From 21cde00b5a50f29643a19f1a270258a380104e15 Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Mon, 2 Dec 2024 00:31:35 +0700
Subject: [PATCH 01/34] chore: add document

---
 docs/docs/engines/engine-extension.mdx | 153 +++++++++++++++----------
 1 file changed, 95 insertions(+), 58 deletions(-)
diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx
index 8a62cd813..84000767b 100644
--- a/docs/docs/engines/engine-extension.mdx
+++ b/docs/docs/engines/engine-extension.mdx
@@ -1,89 +1,126 @@
 ---
-title: Building Engine Extensions
+title: Adding a Third-Party Engine to Cortex
 description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs.
 ---
 
-:::info
-🚧 Cortex is currently under development, and this page is a stub for future development. 
-:::
-
-<!-- 
-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
-
 :::warning
 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
 :::
 
+# Guide to Adding a Third-Party Engine to Cortex
 
-This document provides a step-by-step guide to adding a new engine to the Cortex codebase, similar to the `OpenAIEngineExtension`.
+## Introduction
 
+This guide outlines the steps to integrate a custom engine with Cortex. We hope this helps developers understand the integration process.
 
-## Integrate a New Remote Engine
+## Implementation Steps
 
-### Step 1: Create the New Engine Extension
+### 1. Implement the Engine Interface
 
-1. Navigate to the `cortex-js/src/extensions` directory.
-2. Create a new file named `<new-engine>.engine.ts` (replace `<new-engine>` with the name of your engine).
-3. Implement your new engine extension class using the following template:
+First, create an engine that implements the `EngineI.h` interface. Here's the interface definition:
 
-```typescript
-class <NewEngine>EngineExtension extends OAIEngineExtension {
-  apiUrl = 'https://api.<new-engine>.com/v1/chat/completions';
-  name = '<new-engine>';
-  productName = '<New Engine> Inference Engine';
-  description = 'This extension enables <New Engine> chat completion API calls';
-  version = '0.0.1';
-  apiKey?: string;
-}
-```
+```cpp
+class EngineI {
+ public:
+  struct EngineLoadOption{};
+  struct EngineUnloadOption{};
 
-:::info
-Be sure to replace all placeholders with the appropriate values for your engine.
-:::
+  virtual ~EngineI() {}
+
+  virtual void Load(EngineLoadOption opts) = 0;
+  virtual void Unload(EngineUnloadOption opts) = 0;
 
-### Step 2: Register the New Engine
+  // Cortex.llamacpp interface methods
+  virtual void HandleChatCompletion(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
-1. Open the `extensions.module.ts` located at `cortex-js/src/extensions/`.
+  virtual void HandleEmbedding(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
-2. Register your new engine in the provider array using the following code:
+  virtual void LoadModel(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
-```typescript
-[
-    new OpenAIEngineExtension(httpService, configUsecases, eventEmitter),
-    //... other remote engines
-    new <NewEngine>EngineExtension(httpService, configUsecases, eventEmitter),
-]
+  virtual void UnloadModel(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  virtual void GetModelStatus(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  // Compatibility and model management
+  virtual bool IsSupported(const std::string& f) = 0;
+
+  virtual void GetModels(
+      std::shared_ptr<Json::Value> jsonBody,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  // Logging configuration
+  virtual bool SetFileLogger(int max_log_lines,
+                           const std::string& log_path) = 0;
+  virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0;
+};
 ```
 
-## Explanation of Key Properties and Methods
-| **Value**                   | **Description**                                                                                  |
-|------------------------------------|--------------------------------------------------------------------------------------------------|
-| `apiUrl`                           | This is the URL endpoint for the new engine's API. It is used to make chat completion requests.   |
-| `name`                             | This is a unique identifier for the engine. It is used internally to reference the engine.        |
-| `productName`                      | This is a human-readable name for the engine. It is used for display purposes.                    |
-| `description`                      | This provides a brief description of what the engine does. It is used for documentation and display purposes. |
-| `version`                          | This indicates the version of the engine extension. It is used for version control and display purposes. |
-| `eventEmmitter.on('config.updated')` | This is an event listener that listens for configuration updates. When the configuration for the engine is updated, this listener updates the `apiKey` and the engine's status. |
-| `onLoad`                           | This method is called when the engine extension is loaded. It retrieves the engine's configuration (such as the `apiKey`) and sets the engine's status based on whether the `apiKey` is available. |
+Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine.
+
+### 2. Create a Dynamic Library
+
+We recommend using the [dylib library](https://github.com/martin-olivier/dylib) to build your dynamic library. This library provides helpful tools for creating cross-platform dynamic libraries.
 
-## Advanced: Transforming Payloads and Responses
+### 3. Package Dependencies
 
-Some engines require custom transformations for the payload sent to the API and the response received from the API. This is achieved using the `transformPayload` and `transformResponse` methods. These methods allow you to modify the data structure to match the specific requirements of the engine.
+Please ensure all dependencies are included with your dynamic library. This allows us to create a single, self-contained package for distribution.
 
-### `transformPayload`
+### 4. Publication and Integration
+
+#### 4.1 Publishing Your Engine (Optional)
+
+If you wish to make your engine publicly available, you can publish it through GitHub. For reference, examine the [cortex.llamacpp releases](https://github.com/janhq/cortex.llamacpp/releases) structure:
+
+- Each release tag should represent your version
+- Include all variants within the same release
+- Cortex will automatically select the most suitable variant or allow users to specify their preferred variant
+
+#### 4.2 Integration with Cortex
+
+Once your engine is ready, we encourage you to:
+
+1. Notify the Cortex team about your engine for potential inclusion in our default supported engines list
+2. Allow us to help test and validate your implementation
+
+### 5. Local Testing Guide
+
+To test your engine locally:
+
+1. Create a directory structure following this hierarchy:
+
+```
+engines/
+└── cortex.llamacpp/
+    └── mac-arm64/
+        └── v0.1.40/
+            ├── libengine.dylib
+            └── version.txt
+```
 
-The `transformPayload` method is used to transform the data before sending it to the engine's API. This method takes the original payload and modifies it as needed.
+2. Configure your engine:
 
-**Example: Anthropic Engine**
+   - Edit the `~/.cortexrc` file to register your engine name
+   - Add your model with the appropriate engine field in `model.yaml`
 
-In the Anthropic Engine, the `transformPayload` method extracts the system message and other messages, and includes additional parameters like `model`, `stream`, and `max_tokens`.
+3. Testing:
+   - Start the engine
+   - Load your model
+   - Verify functionality
 
-### `transformResponse`
+## Future Development
 
-The `transformResponse` method is used to transform the data received from the engine's API. This method processes the response and converts it into a format that the application can use.
+We're currently working on expanding support for additional release sources to make distribution more flexible.
 
-**Example: Anthropic Engine**
+## Contributing
 
-In the Anthropic Engine, the `transformResponse` method handles both stream and non-stream responses. It processes the response data and converts it into a standardized format.
- -->
+We welcome suggestions and contributions to improve this integration process. Please feel free to submit issues or pull requests through our repository.

From bd1bf91738d87418be8654dc7e485b1cdf3fce73 Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Mon, 2 Dec 2024 15:13:34 +0700
Subject: [PATCH 02/34] feat: update engine interface

---
 docs/docs/engines/engine-extension.mdx  |  96 ++++++++-
 engine/cli/commands/server_start_cmd.cc |  22 +-
 engine/controllers/engines.cc           |   5 +-
 engine/cortex-common/EngineI.h          |  30 +++
 engine/services/engine_service.cc       | 263 ++++++++++--------------
 engine/services/engine_service.h        |  10 +
 engine/services/hardware_service.cc     |   2 +-
 engine/utils/config_yaml_utils.cc       |   1 +
 engine/utils/config_yaml_utils.h        |   5 +-
 engine/utils/file_manager_utils.cc      |   1 +
 10 files changed, 257 insertions(+), 178 deletions(-)

diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx
index 84000767b..6bb966f60 100644
--- a/docs/docs/engines/engine-extension.mdx
+++ b/docs/docs/engines/engine-extension.mdx
@@ -22,12 +22,32 @@ First, create an engine that implements the `EngineI.h` interface. Here's the in
 ```cpp
 class EngineI {
  public:
-  struct EngineLoadOption{};
-  struct EngineUnloadOption{};
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
 
   virtual ~EngineI() {}
 
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
   virtual void Load(EngineLoadOption opts) = 0;
+
   virtual void Unload(EngineUnloadOption opts) = 0;
 
   // Cortex.llamacpp interface methods
@@ -65,7 +85,71 @@ class EngineI {
 };
 ```
 
-Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine.
+#### Lifecycle Management
+
+##### RegisterLibraryPath
+
+```cpp
+virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+```
+
+This method is called during engine initialization to set up dynamic library search paths. For example, in Linux, we still have to use `LD_LIBRARY_PATH` to add CUDA dependencies to the search path.
+
+**Parameters:**
+
+- `opts.paths`: Vector of filesystem paths that the engine should register
+
+**Implementation Requirements:**
+
+- Register provided paths for dynamic library loading
+- Handle invalid paths gracefully
+- Thread-safe implementation
+- No exceptions should escape the method
+
+##### Load
+
+```cpp
+virtual void Load(EngineLoadOption opts) = 0;
+```
+
+Initializes the engine with the provided configuration options.
+
+**Parameters:**
+
+- `engine_path`: Base path for engine files
+- `cuda_path`: Path to CUDA installation
+- `custom_engine_path`: Flag for using custom engine location
+- `log_path`: Location for log files
+- `max_log_lines`: Maximum number of lines per log file
+- `log_level`: Logging verbosity level
+
+**Implementation Requirements:**
+
+- Validate all paths before use
+- Initialize engine components
+- Set up logging configuration
+- Handle missing dependencies gracefully
+- Clean initialization state in case of failures
+
+##### Unload
+
+```cpp
+virtual void Unload(EngineUnloadOption opts) = 0;
+```
+
+Performs cleanup and shutdown of the engine.
+
+**Parameters:**
+
+- `unload_dll`: Boolean flag indicating whether to unload dynamic libraries
+
+**Implementation Requirements:**
+
+- Clean up all allocated resources
+- Close file handles and connections
+- Release memory
+- Ensure proper shutdown of running models
+- Handle cleanup in a thread-safe manner
 
 ### 2. Create a Dynamic Library
 
@@ -98,7 +182,7 @@ To test your engine locally:
 
 1. Create a directory structure following this hierarchy:
 
-```
+```bash
 engines/
 └── cortex.llamacpp/
     └── mac-arm64/
@@ -107,12 +191,12 @@ engines/
             └── version.txt
 ```
 
-2. Configure your engine:
+1. Configure your engine:
 
    - Edit the `~/.cortexrc` file to register your engine name
    - Add your model with the appropriate engine field in `model.yaml`
 
-3. Testing:
+2. Testing:
    - Start the engine
    - Load your model
    - Verify functionality
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index ba4f7bd82..3d52f3d25 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -1,9 +1,12 @@
 #include "server_start_cmd.h"
 #include "commands/cortex_upd_cmd.h"
+#include "services/engine_service.h"
 #include "utils/cortex_utils.h"
-#include "utils/engine_constants.h"
 #include "utils/file_manager_utils.h"
+
+#if defined(_WIN32) || defined(_WIN64)
 #include "utils/widechar_conv.h"
+#endif
 
 namespace commands {
 
@@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
     std::cerr << "Could not start server: " << std::endl;
     return false;
   } else if (pid == 0) {
-    // No need to configure LD_LIBRARY_PATH for macOS
-#if !defined(__APPLE__) || !defined(__MACH__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
+    // Some engines requires to add lib search path before process being created
+    EngineService().RegisterEngineLibPath();
 
-    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
     std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
     execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path",
           get_config_file_path().c_str(), "--data_folder_path",
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index 9e110bd66..8a5e5010b 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) {
 void Engines::ListEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) const {
-  std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
-                                             kTrtLlmEngine};
   Json::Value ret;
-  for (const auto& engine : supported_engines) {
+  auto engine_names = engine_service_->GetSupportedEngineNames().value();
+  for (const auto& engine : engine_names) {
     auto installed_engines =
         engine_service_->GetInstalledEngineVariants(engine);
     if (installed_engines.has_error()) {
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index 95ce605de..da2f5a5ab 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <filesystem>
 #include <functional>
 #include <memory>
 
@@ -7,8 +8,37 @@
 #include "trantor/utils/Logger.h"
 class EngineI {
  public:
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
+
   virtual ~EngineI() {}
 
+  /**
+   * Being called before starting process to register dependencies search paths.
+   */
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
+  virtual void Load(EngineLoadOption opts) = 0;
+
+  virtual void Unload(EngineUnloadOption opts) = 0;
+
   // cortex.llamacpp interface
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index c52e32ef0..0df4a8ccb 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -2,6 +2,7 @@
 #include <cstdlib>
 #include <filesystem>
 #include <optional>
+#include <utility>
 #include "algorithm"
 #include "utils/archive_utils.h"
 #include "utils/engine_constants.h"
@@ -179,6 +180,7 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
     const std::string& engine, const std::optional<std::string> version,
     const std::optional<std::string> variant) {
   auto ne = NormalizeEngine(engine);
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -272,6 +274,7 @@ cpp::result<void, std::string> EngineService::DownloadEngine(
   if (selected_variant == std::nullopt) {
     return cpp::fail("Failed to find a suitable variant for " + engine);
   }
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(engine)) {
     CTL_INF("Engine " << engine << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(engine);
@@ -503,6 +506,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
                      " is not installed yet!");
   }
 
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -631,7 +635,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
 }
 
 bool EngineService::IsEngineLoaded(const std::string& engine) {
-  std::lock_guard<std::mutex> lock(engines_mutex_);
   auto ne = NormalizeEngine(engine);
   return engines_.find(ne) != engines_.end();
 }
@@ -647,16 +650,43 @@ cpp::result<EngineV, std::string> EngineService::GetLoadedEngine(
   return engines_[ne].engine;
 }
 
-cpp::result<void, std::string> EngineService::LoadEngine(
-    const std::string& engine_name) {
-  auto ne = NormalizeEngine(engine_name);
-
-  if (IsEngineLoaded(ne)) {
-    CTL_INF("Engine " << ne << " is already loaded");
-    return {};
+void EngineService::RegisterEngineLibPath() {
+  auto engine_names = GetSupportedEngineNames().value();
+  for (const auto& engine : engine_names) {
+    auto ne = NormalizeEngine(engine);
+    try {
+      auto engine_dir_path_res = GetEngineDirPath(engine);
+      if (engine_dir_path_res.has_error()) {
+        CTL_ERR(
+            "Could not get engine dir path: " << engine_dir_path_res.error());
+        continue;
+      }
+      auto engine_dir_path = engine_dir_path_res.value().first;
+      auto custom_engine_path = engine_dir_path_res.value().second;
+
+      auto dylib = std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(),
+                                                       "engine");
+
+      auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne);
+      // init
+      auto func = dylib->get_function<EngineI*()>("get_engine");
+      auto engine = func();
+      std::vector<std::filesystem::path> paths{};
+      auto register_opts = EngineI::RegisterLibraryOption{
+          .paths = paths,
+      };
+      engine->RegisterLibraryPath(register_opts);
+      delete engine;
+      CTL_DBG("Register lib path for: " << engine);
+    } catch (const std::exception& e) {
+      CTL_WRN("Failed to registering engine lib path: " << e.what());
+    }
   }
+}
 
-  CTL_INF("Loading engine: " << ne);
+cpp::result<std::pair<std::filesystem::path, bool>, std::string>
+EngineService::GetEngineDirPath(const std::string& engine_name) {
+  auto ne = NormalizeEngine(engine_name);
 
   auto selected_engine_variant = GetDefaultEngineVariant(ne);
 
@@ -672,6 +702,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
   auto user_defined_engine_path = getenv("ENGINE_PATH");
 #endif
 
+  auto custom_engine_path = user_defined_engine_path != nullptr;
   CTL_DBG("user defined engine path: " << user_defined_engine_path);
   const std::filesystem::path engine_dir_path = [&] {
     if (user_defined_engine_path != nullptr) {
@@ -685,175 +716,99 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     }
   }();
 
-  CTL_DBG("Engine path: " << engine_dir_path.string());
-
   if (!std::filesystem::exists(engine_dir_path)) {
     CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
     return cpp::fail("Directory " + engine_dir_path.string() +
                      " is not exist!");
   }
 
-  CTL_INF("Engine path: " << engine_dir_path.string());
+  CTL_INF("Engine path: " << engine_dir_path.string()
+                          << ", custom_engine_path: " << custom_engine_path);
+  return std::make_pair(engine_dir_path, custom_engine_path);
+}
 
-  try {
-#if defined(_WIN32)
-    // TODO(?) If we only allow to load an engine at a time, the logic is simpler.
-    // We would like to support running multiple engines at the same time. Therefore,
-    // the adding/removing dll directory logic is quite complicated:
-    // 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
-    // Unload the llamacpp dll directory then load the tensorrt-llm
-    // 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
-    // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
-    // 3. Add dll directory if met other conditions
-
-    auto add_dll = [this](const std::string& e_type,
-                          const std::filesystem::path& p) {
-      if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
-        CTL_DBG("Added dll directory: " << p.string());
-        engines_[e_type].cookie = cookie;
-      } else {
-        CTL_WRN("Could not add dll directory: " << p.string());
-      }
+cpp::result<void, std::string> EngineService::LoadEngine(
+    const std::string& engine_name) {
+  auto ne = NormalizeEngine(engine_name);
 
-      auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
-      if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
-          cuda_cookie != 0) {
-        CTL_DBG("Added cuda dll directory: " << p.string());
-        engines_[e_type].cuda_cookie = cuda_cookie;
-      } else {
-        CTL_WRN("Could not add cuda dll directory: " << p.string());
-      }
-    };
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  if (IsEngineLoaded(ne)) {
+    CTL_INF("Engine " << ne << " is already loaded");
+    return {};
+  }
 
-#if defined(_WIN32)
-    if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
-#else
-    if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
-#endif
-        should_use_dll_search_path) {
-      if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
-          should_use_dll_search_path) {
-
-        {
-          std::lock_guard<std::mutex> lock(engines_mutex_);
-          // Remove llamacpp dll directory
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
-            CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed dll directory: " << kLlamaRepo);
-          }
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
-            CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
-          }
-        }
+  CTL_INF("Loading engine: " << ne);
 
-        add_dll(ne, engine_dir_path);
-      } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
-        // Do nothing
-      } else {
-        add_dll(ne, engine_dir_path);
-      }
-    }
-#endif
-    {
-      std::lock_guard<std::mutex> lock(engines_mutex_);
-      engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
-          engine_dir_path.string(), "engine");
-    }
-#if defined(__linux__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    CTL_INF("llamacpp_path: " << llamacpp_path);
-    // tensorrt is not supported for now
-    // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
-
-    auto new_v = llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
+  auto engine_dir_path_res = GetEngineDirPath(ne);
+  if (engine_dir_path_res.has_error()) {
+    return cpp::fail(engine_dir_path_res.error());
+  }
+  auto engine_dir_path = engine_dir_path_res.value().first;
+  auto custom_engine_path = engine_dir_path_res.value().second;
+
+  try {
+    auto dylib =
+        std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
+
+    auto config = file_manager_utils::GetCortexConfig();
+
+    auto log_path =
+        std::filesystem::path(config.logFolderPath) /
+        std::filesystem::path(
+            config.logLlamaCppPath);  // for now seems like we use same log path
+
+    // init
+    auto func = dylib->get_function<EngineI*()>("get_engine");
+    auto engine_obj = func();
+    auto load_opts = EngineI::EngineLoadOption{
+        .engine_path = engine_dir_path,
+        .cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
+        .custom_engine_path = custom_engine_path,
+        .log_path = log_path,
+        .max_log_lines = config.maxLogLines,
+        .log_level = logging_utils_helper::global_log_level,
+    };
+    engine_obj->Load(load_opts);
+
+    engines_[ne].engine = engine_obj;
+    engines_[ne].dl = std::move(dylib);
 
+    CTL_DBG("Engine loaded: " << ne);
+    return {};
   } catch (const cortex_cpp::dylib::load_error& e) {
     CTL_ERR("Could not load engine: " << e.what());
-    {
-      std::lock_guard<std::mutex> lock(engines_mutex_);
-      engines_.erase(ne);
-    }
+    engines_.erase(ne);
     return cpp::fail("Could not load engine " + ne + ": " + e.what());
   }
-
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
-    engines_[ne].engine = func();
-
-    auto& en = std::get<EngineI*>(engines_[ne].engine);
-    if (ne == kLlamaRepo) {  //fix for llamacpp engine first
-      auto config = file_manager_utils::GetCortexConfig();
-      if (en->IsSupported("SetFileLogger")) {
-        en->SetFileLogger(config.maxLogLines,
-                          (std::filesystem::path(config.logFolderPath) /
-                           std::filesystem::path(config.logLlamaCppPath))
-                              .string());
-      } else {
-        CTL_WRN("Method SetFileLogger is not supported yet");
-      }
-      if (en->IsSupported("SetLogLevel")) {
-        en->SetLogLevel(logging_utils_helper::global_log_level);
-      } else {
-        CTL_WRN("Method SetLogLevel is not supported yet");
-      }
-    }
-    CTL_DBG("loaded engine: " << ne);
-  }
-  return {};
 }
 
 cpp::result<void, std::string> EngineService::UnloadEngine(
     const std::string& engine) {
   auto ne = NormalizeEngine(engine);
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    if (!IsEngineLoaded(ne)) {
-      return cpp::fail("Engine " + ne + " is not loaded yet!");
-    }
-    EngineI* e = std::get<EngineI*>(engines_[ne].engine);
-    delete e;
+  LOG_INFO << "Unloading engine " << ne;
 
-#if defined(_WIN32)
-    if (!RemoveDllDirectory(engines_[ne].cookie)) {
-      CTL_WRN("Could not remove dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed dll directory: " << ne);
-    }
-    if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
-      CTL_WRN("Could not remove cuda dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed cuda dll directory: " << ne);
-    }
-#endif
-    engines_.erase(ne);
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  if (!IsEngineLoaded(ne)) {
+    return cpp::fail("Engine " + ne + " is not loaded yet!");
   }
-  CTL_DBG("Unloaded engine " + ne);
+  auto* e = std::get<EngineI*>(engines_[ne].engine);
+  auto unload_opts = EngineI::EngineUnloadOption{
+      .unload_dll = true,
+  };
+  e->Unload(unload_opts);
+  delete e;
+  engines_.erase(ne);
+  CTL_DBG("Engine unloaded: " + ne);
   return {};
 }
 
 std::vector<EngineV> EngineService::GetLoadedEngines() {
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    std::vector<EngineV> loaded_engines;
-    for (const auto& [key, value] : engines_) {
-      loaded_engines.push_back(value.engine);
-    }
-    return loaded_engines;
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  std::vector<EngineV> loaded_engines;
+  for (const auto& [key, value] : engines_) {
+    loaded_engines.push_back(value.engine);
   }
+  return loaded_engines;
 }
 
 cpp::result<github_release_utils::GitHubRelease, std::string>
@@ -899,6 +854,7 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
   CTL_INF("Default variant: " << default_variant->variant
                               << ", version: " + default_variant->version);
 
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -955,3 +911,8 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
                             .from = default_variant->version,
                             .to = latest_version->tag_name};
 }
+
+cpp::result<std::vector<std::string>, std::string>
+EngineService::GetSupportedEngineNames() {
+  return file_manager_utils::GetCortexConfig().supportedEngines;
+}
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index 47d7c272f..5437cb496 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -65,6 +65,9 @@ class EngineService : public EngineServiceI {
                 .cuda_driver_version =
                     system_info_utils::GetDriverAndCudaVersion().second} {}
 
+  // just for initialize supported engines
+  EngineService() {};
+
   std::vector<EngineInfo> GetEngineInfoList() const;
 
   /**
@@ -123,6 +126,10 @@ class EngineService : public EngineServiceI {
   cpp::result<EngineUpdateResult, std::string> UpdateEngine(
       const std::string& engine);
 
+  cpp::result<std::vector<std::string>, std::string> GetSupportedEngineNames();
+
+  void RegisterEngineLibPath();
+
  private:
   cpp::result<void, std::string> DownloadEngine(
       const std::string& engine, const std::string& version = "latest",
@@ -134,6 +141,9 @@ class EngineService : public EngineServiceI {
   std::string GetMatchedVariant(const std::string& engine,
                                 const std::vector<std::string>& variants);
 
+  cpp::result<std::pair<std::filesystem::path, bool>, std::string>
+  GetEngineDirPath(const std::string& engine_name);
+
   cpp::result<bool, std::string> IsEngineVariantReady(
       const std::string& engine, const std::string& version,
       const std::string& variant);
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 681ca7578..a5890eab9 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -5,11 +5,11 @@
 #if defined(_WIN32) || defined(_WIN64)
 #include <minwindef.h>
 #include <processenv.h>
+#include "utils/widechar_conv.h"
 #endif
 #include "cli/commands/cortex_upd_cmd.h"
 #include "database/hardware.h"
 #include "utils/cortex_utils.h"
-#include "utils/widechar_conv.h"
 
 namespace services {
 
diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc
index 4d6f47ebe..3c5e6b727 100644
--- a/engine/utils/config_yaml_utils.cc
+++ b/engine/utils/config_yaml_utils.cc
@@ -42,6 +42,7 @@ cpp::result<void, std::string> CortexConfigMgr::DumpYamlConfig(
     node["noProxy"] = config.noProxy;
     node["verifyPeerSsl"] = config.verifyPeerSsl;
     node["verifyHostSsl"] = config.verifyHostSsl;
+    node["supportedEngines"] = config.supportedEngines;
 
     out_file << node;
     out_file.close();
diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
index aa1b4027e..caaa4dacf 100644
--- a/engine/utils/config_yaml_utils.h
+++ b/engine/utils/config_yaml_utils.h
@@ -5,6 +5,7 @@
 #include <iostream>
 #include <mutex>
 #include <string>
+#include "utils/engine_constants.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
 #include "yaml-cpp/yaml.h"
@@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true;
 const std::vector<std::string> kDefaultEnabledOrigins{
     "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"};
 constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1";
+const std::vector<std::string> kDefaultSupportedEngines{
+    kLlamaEngine, kOnnxEngine, kTrtLlmEngine};
 
 struct CortexConfig {
   std::string logFolderPath;
@@ -59,6 +62,7 @@ struct CortexConfig {
 
   bool verifyPeerSsl;
   bool verifyHostSsl;
+  std::vector<std::string> supportedEngines;
 };
 
 class CortexConfigMgr {
@@ -82,5 +86,4 @@ class CortexConfigMgr {
   CortexConfig FromYaml(const std::string& path,
                         const CortexConfig& default_cfg);
 };
-
 }  // namespace config_yaml_utils
diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc
index 11128a275..4f2a68804 100644
--- a/engine/utils/file_manager_utils.cc
+++ b/engine/utils/file_manager_utils.cc
@@ -185,6 +185,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() {
       .noProxy = config_yaml_utils::kDefaultNoProxy,
       .verifyPeerSsl = true,
       .verifyHostSsl = true,
+      .supportedEngines = config_yaml_utils::kDefaultSupportedEngines,
   };
 }
 

From 08fbb8a80e081459cbbf4a539432ac8dc3c3bedb Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Mon, 2 Dec 2024 00:31:35 +0700
Subject: [PATCH 03/34] chore: add document

---
 docs/docs/engines/engine-extension.mdx | 153 +++++++++++++++----------
 1 file changed, 95 insertions(+), 58 deletions(-)

diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx
index 8a62cd813..84000767b 100644
--- a/docs/docs/engines/engine-extension.mdx
+++ b/docs/docs/engines/engine-extension.mdx
@@ -1,89 +1,126 @@
 ---
-title: Building Engine Extensions
+title: Adding a Third-Party Engine to Cortex
 description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs.
 ---
 
-:::info
-🚧 Cortex is currently under development, and this page is a stub for future development. 
-:::
-
-<!-- 
-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
-
 :::warning
 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
 :::
 
+# Guide to Adding a Third-Party Engine to Cortex
 
-This document provides a step-by-step guide to adding a new engine to the Cortex codebase, similar to the `OpenAIEngineExtension`.
+## Introduction
 
+This guide outlines the steps to integrate a custom engine with Cortex. We hope this helps developers understand the integration process.
 
-## Integrate a New Remote Engine
+## Implementation Steps
 
-### Step 1: Create the New Engine Extension
+### 1. Implement the Engine Interface
 
-1. Navigate to the `cortex-js/src/extensions` directory.
-2. Create a new file named `<new-engine>.engine.ts` (replace `<new-engine>` with the name of your engine).
-3. Implement your new engine extension class using the following template:
+First, create an engine that implements the `EngineI.h` interface. Here's the interface definition:
 
-```typescript
-class <NewEngine>EngineExtension extends OAIEngineExtension {
-  apiUrl = 'https://api.<new-engine>.com/v1/chat/completions';
-  name = '<new-engine>';
-  productName = '<New Engine> Inference Engine';
-  description = 'This extension enables <New Engine> chat completion API calls';
-  version = '0.0.1';
-  apiKey?: string;
-}
-```
+```cpp
+class EngineI {
+ public:
+  struct EngineLoadOption{};
+  struct EngineUnloadOption{};
 
-:::info
-Be sure to replace all placeholders with the appropriate values for your engine.
-:::
+  virtual ~EngineI() {}
+
+  virtual void Load(EngineLoadOption opts) = 0;
+  virtual void Unload(EngineUnloadOption opts) = 0;
 
-### Step 2: Register the New Engine
+  // Cortex.llamacpp interface methods
+  virtual void HandleChatCompletion(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
-1. Open the `extensions.module.ts` located at `cortex-js/src/extensions/`.
+  virtual void HandleEmbedding(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
-2. Register your new engine in the provider array using the following code:
+  virtual void LoadModel(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
-```typescript
-[
-    new OpenAIEngineExtension(httpService, configUsecases, eventEmitter),
-    //... other remote engines
-    new <NewEngine>EngineExtension(httpService, configUsecases, eventEmitter),
-]
+  virtual void UnloadModel(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  virtual void GetModelStatus(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  // Compatibility and model management
+  virtual bool IsSupported(const std::string& f) = 0;
+
+  virtual void GetModels(
+      std::shared_ptr<Json::Value> jsonBody,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+
+  // Logging configuration
+  virtual bool SetFileLogger(int max_log_lines,
+                           const std::string& log_path) = 0;
+  virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0;
+};
 ```
 
-## Explanation of Key Properties and Methods
-| **Value**                   | **Description**                                                                                  |
-|------------------------------------|--------------------------------------------------------------------------------------------------|
-| `apiUrl`                           | This is the URL endpoint for the new engine's API. It is used to make chat completion requests.   |
-| `name`                             | This is a unique identifier for the engine. It is used internally to reference the engine.        |
-| `productName`                      | This is a human-readable name for the engine. It is used for display purposes.                    |
-| `description`                      | This provides a brief description of what the engine does. It is used for documentation and display purposes. |
-| `version`                          | This indicates the version of the engine extension. It is used for version control and display purposes. |
-| `eventEmmitter.on('config.updated')` | This is an event listener that listens for configuration updates. When the configuration for the engine is updated, this listener updates the `apiKey` and the engine's status. |
-| `onLoad`                           | This method is called when the engine extension is loaded. It retrieves the engine's configuration (such as the `apiKey`) and sets the engine's status based on whether the `apiKey` is available. |
+Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine.
+
+### 2. Create a Dynamic Library
+
+We recommend using the [dylib library](https://github.com/martin-olivier/dylib) to build your dynamic library. This library provides helpful tools for creating cross-platform dynamic libraries.
 
-## Advanced: Transforming Payloads and Responses
+### 3. Package Dependencies
 
-Some engines require custom transformations for the payload sent to the API and the response received from the API. This is achieved using the `transformPayload` and `transformResponse` methods. These methods allow you to modify the data structure to match the specific requirements of the engine.
+Please ensure all dependencies are included with your dynamic library. This allows us to create a single, self-contained package for distribution.
 
-### `transformPayload`
+### 4. Publication and Integration
+
+#### 4.1 Publishing Your Engine (Optional)
+
+If you wish to make your engine publicly available, you can publish it through GitHub. For reference, examine the [cortex.llamacpp releases](https://github.com/janhq/cortex.llamacpp/releases) structure:
+
+- Each release tag should represent your version
+- Include all variants within the same release
+- Cortex will automatically select the most suitable variant or allow users to specify their preferred variant
+
+#### 4.2 Integration with Cortex
+
+Once your engine is ready, we encourage you to:
+
+1. Notify the Cortex team about your engine for potential inclusion in our default supported engines list
+2. Allow us to help test and validate your implementation
+
+### 5. Local Testing Guide
+
+To test your engine locally:
+
+1. Create a directory structure following this hierarchy:
+
+```
+engines/
+└── cortex.llamacpp/
+    └── mac-arm64/
+        └── v0.1.40/
+            ├── libengine.dylib
+            └── version.txt
+```
 
-The `transformPayload` method is used to transform the data before sending it to the engine's API. This method takes the original payload and modifies it as needed.
+2. Configure your engine:
 
-**Example: Anthropic Engine**
+   - Edit the `~/.cortexrc` file to register your engine name
+   - Add your model with the appropriate engine field in `model.yaml`
 
-In the Anthropic Engine, the `transformPayload` method extracts the system message and other messages, and includes additional parameters like `model`, `stream`, and `max_tokens`.
+3. Testing:
+   - Start the engine
+   - Load your model
+   - Verify functionality
 
-### `transformResponse`
+## Future Development
 
-The `transformResponse` method is used to transform the data received from the engine's API. This method processes the response and converts it into a format that the application can use.
+We're currently working on expanding support for additional release sources to make distribution more flexible.
 
-**Example: Anthropic Engine**
+## Contributing
 
-In the Anthropic Engine, the `transformResponse` method handles both stream and non-stream responses. It processes the response data and converts it into a standardized format.
- -->
+We welcome suggestions and contributions to improve this integration process. Please feel free to submit issues or pull requests through our repository.

From 7d9cf3b851766e5be41b75250e79a0706c5a61a3 Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Mon, 2 Dec 2024 15:13:34 +0700
Subject: [PATCH 04/34] feat: update engine interface

---
 docs/docs/engines/engine-extension.mdx  |  96 ++++++++-
 engine/cli/commands/server_start_cmd.cc |  22 +--
 engine/controllers/engines.cc           |   5 +-
 engine/cortex-common/EngineI.h          |  30 +++
 engine/services/engine_service.cc       | 246 +++++++++++-------------
 engine/services/engine_service.h        |  12 +-
 engine/services/hardware_service.cc     |   2 +-
 engine/utils/config_yaml_utils.cc       |   1 +
 engine/utils/config_yaml_utils.h        |   5 +-
 engine/utils/file_manager_utils.cc      |   1 +
 10 files changed, 253 insertions(+), 167 deletions(-)

diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx
index 84000767b..6bb966f60 100644
--- a/docs/docs/engines/engine-extension.mdx
+++ b/docs/docs/engines/engine-extension.mdx
@@ -22,12 +22,32 @@ First, create an engine that implements the `EngineI.h` interface. Here's the in
 ```cpp
 class EngineI {
  public:
-  struct EngineLoadOption{};
-  struct EngineUnloadOption{};
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
 
   virtual ~EngineI() {}
 
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
   virtual void Load(EngineLoadOption opts) = 0;
+
   virtual void Unload(EngineUnloadOption opts) = 0;
 
   // Cortex.llamacpp interface methods
@@ -65,7 +85,71 @@ class EngineI {
 };
 ```
 
-Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine.
+#### Lifecycle Management
+
+##### RegisterLibraryPath
+
+```cpp
+virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+```
+
+This method is called during engine initialization to set up dynamic library search paths. For example, in Linux, we still have to use `LD_LIBRARY_PATH` to add CUDA dependencies to the search path.
+
+**Parameters:**
+
+- `opts.paths`: Vector of filesystem paths that the engine should register
+
+**Implementation Requirements:**
+
+- Register provided paths for dynamic library loading
+- Handle invalid paths gracefully
+- Thread-safe implementation
+- No exceptions should escape the method
+
+##### Load
+
+```cpp
+virtual void Load(EngineLoadOption opts) = 0;
+```
+
+Initializes the engine with the provided configuration options.
+
+**Parameters:**
+
+- `engine_path`: Base path for engine files
+- `cuda_path`: Path to CUDA installation
+- `custom_engine_path`: Flag for using custom engine location
+- `log_path`: Location for log files
+- `max_log_lines`: Maximum number of lines per log file
+- `log_level`: Logging verbosity level
+
+**Implementation Requirements:**
+
+- Validate all paths before use
+- Initialize engine components
+- Set up logging configuration
+- Handle missing dependencies gracefully
+- Clean initialization state in case of failures
+
+##### Unload
+
+```cpp
+virtual void Unload(EngineUnloadOption opts) = 0;
+```
+
+Performs cleanup and shutdown of the engine.
+
+**Parameters:**
+
+- `unload_dll`: Boolean flag indicating whether to unload dynamic libraries
+
+**Implementation Requirements:**
+
+- Clean up all allocated resources
+- Close file handles and connections
+- Release memory
+- Ensure proper shutdown of running models
+- Handle cleanup in a thread-safe manner
 
 ### 2. Create a Dynamic Library
 
@@ -98,7 +182,7 @@ To test your engine locally:
 
 1. Create a directory structure following this hierarchy:
 
-```
+```bash
 engines/
 └── cortex.llamacpp/
     └── mac-arm64/
@@ -107,12 +191,12 @@ engines/
             └── version.txt
 ```
 
-2. Configure your engine:
+1. Configure your engine:
 
    - Edit the `~/.cortexrc` file to register your engine name
    - Add your model with the appropriate engine field in `model.yaml`
 
-3. Testing:
+2. Testing:
    - Start the engine
    - Load your model
    - Verify functionality
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index ba4f7bd82..3d52f3d25 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -1,9 +1,12 @@
 #include "server_start_cmd.h"
 #include "commands/cortex_upd_cmd.h"
+#include "services/engine_service.h"
 #include "utils/cortex_utils.h"
-#include "utils/engine_constants.h"
 #include "utils/file_manager_utils.h"
+
+#if defined(_WIN32) || defined(_WIN64)
 #include "utils/widechar_conv.h"
+#endif
 
 namespace commands {
 
@@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
     std::cerr << "Could not start server: " << std::endl;
     return false;
   } else if (pid == 0) {
-    // No need to configure LD_LIBRARY_PATH for macOS
-#if !defined(__APPLE__) || !defined(__MACH__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
+    // Some engines requires to add lib search path before process being created
+    EngineService().RegisterEngineLibPath();
 
-    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
     std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
     execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path",
           get_config_file_path().c_str(), "--data_folder_path",
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index 3d3c0c037..1d0223d9a 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) {
 void Engines::ListEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) const {
-  std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
-                                             kTrtLlmEngine};
   Json::Value ret;
-  for (const auto& engine : supported_engines) {
+  auto engine_names = engine_service_->GetSupportedEngineNames().value();
+  for (const auto& engine : engine_names) {
     auto installed_engines =
         engine_service_->GetInstalledEngineVariants(engine);
     if (installed_engines.has_error()) {
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index 51e19c124..11866a708 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <filesystem>
 #include <functional>
 #include <memory>
 
@@ -7,8 +8,37 @@
 #include "trantor/utils/Logger.h"
 class EngineI {
  public:
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
+
   virtual ~EngineI() {}
 
+  /**
+   * Being called before starting process to register dependencies search paths.
+   */
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
+  virtual void Load(EngineLoadOption opts) = 0;
+
+  virtual void Unload(EngineUnloadOption opts) = 0;
+
   // cortex.llamacpp interface
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index fe5317c7d..4f2122f6b 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -2,6 +2,7 @@
 #include <cstdlib>
 #include <filesystem>
 #include <optional>
+#include <utility>
 #include <vector>
 #include "algorithm"
 #include "database/engines.h"
@@ -17,6 +18,7 @@
 #include "utils/semantic_version_utils.h"
 #include "utils/system_info_utils.h"
 #include "utils/url_parser.h"
+
 namespace {
 std::string GetSuitableCudaVersion(const std::string& engine,
                                    const std::string& cuda_driver_version) {
@@ -701,6 +703,87 @@ cpp::result<void, std::string> EngineService::LoadEngine(
 
   CTL_INF("Loading engine: " << ne);
 
+  auto engine_dir_path_res = GetEngineDirPath(ne);
+  if (engine_dir_path_res.has_error()) {
+    return cpp::fail(engine_dir_path_res.error());
+  }
+  auto engine_dir_path = engine_dir_path_res.value().first;
+  auto custom_engine_path = engine_dir_path_res.value().second;
+
+  try {
+    auto dylib =
+        std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
+
+    auto config = file_manager_utils::GetCortexConfig();
+
+    auto log_path =
+        std::filesystem::path(config.logFolderPath) /
+        std::filesystem::path(
+            config.logLlamaCppPath);  // for now seems like we use same log path
+
+    // init
+    auto func = dylib->get_function<EngineI*()>("get_engine");
+    auto engine_obj = func();
+    auto load_opts = EngineI::EngineLoadOption{
+        .engine_path = engine_dir_path,
+        .cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
+        .custom_engine_path = custom_engine_path,
+        .log_path = log_path,
+        .max_log_lines = config.maxLogLines,
+        .log_level = logging_utils_helper::global_log_level,
+    };
+    engine_obj->Load(load_opts);
+
+    engines_[ne].engine = engine_obj;
+    engines_[ne].dl = std::move(dylib);
+
+    CTL_DBG("Engine loaded: " << ne);
+    return {};
+  } catch (const cortex_cpp::dylib::load_error& e) {
+    CTL_ERR("Could not load engine: " << e.what());
+    engines_.erase(ne);
+    return cpp::fail("Could not load engine " + ne + ": " + e.what());
+  }
+}
+
+void EngineService::RegisterEngineLibPath() {
+  auto engine_names = GetSupportedEngineNames().value();
+  for (const auto& engine : engine_names) {
+    auto ne = NormalizeEngine(engine);
+    try {
+      auto engine_dir_path_res = GetEngineDirPath(engine);
+      if (engine_dir_path_res.has_error()) {
+        CTL_ERR(
+            "Could not get engine dir path: " << engine_dir_path_res.error());
+        continue;
+      }
+      auto engine_dir_path = engine_dir_path_res.value().first;
+      auto custom_engine_path = engine_dir_path_res.value().second;
+
+      auto dylib = std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(),
+                                                       "engine");
+
+      auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne);
+      // init
+      auto func = dylib->get_function<EngineI*()>("get_engine");
+      auto engine = func();
+      std::vector<std::filesystem::path> paths{};
+      auto register_opts = EngineI::RegisterLibraryOption{
+          .paths = paths,
+      };
+      engine->RegisterLibraryPath(register_opts);
+      delete engine;
+      CTL_DBG("Register lib path for: " << engine);
+    } catch (const std::exception& e) {
+      CTL_WRN("Failed to registering engine lib path: " << e.what());
+    }
+  }
+}
+
+cpp::result<std::pair<std::filesystem::path, bool>, std::string>
+EngineService::GetEngineDirPath(const std::string& engine_name) {
+  auto ne = NormalizeEngine(engine_name);
+
   auto selected_engine_variant = GetDefaultEngineVariant(ne);
 
   if (selected_engine_variant.has_error()) {
@@ -715,6 +798,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
   auto user_defined_engine_path = getenv("ENGINE_PATH");
 #endif
 
+  auto custom_engine_path = user_defined_engine_path != nullptr;
   CTL_DBG("user defined engine path: " << user_defined_engine_path);
   const std::filesystem::path engine_dir_path = [&] {
     if (user_defined_engine_path != nullptr) {
@@ -728,157 +812,38 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     }
   }();
 
-  CTL_DBG("Engine path: " << engine_dir_path.string());
-
   if (!std::filesystem::exists(engine_dir_path)) {
     CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
     return cpp::fail("Directory " + engine_dir_path.string() +
                      " is not exist!");
   }
 
-  CTL_INF("Engine path: " << engine_dir_path.string());
-
-  try {
-#if defined(_WIN32)
-    // TODO(?) If we only allow to load an engine at a time, the logic is simpler.
-    // We would like to support running multiple engines at the same time. Therefore,
-    // the adding/removing dll directory logic is quite complicated:
-    // 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
-    // Unload the llamacpp dll directory then load the tensorrt-llm
-    // 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
-    // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
-    // 3. Add dll directory if met other conditions
-
-    auto add_dll = [this](const std::string& e_type,
-                          const std::filesystem::path& p) {
-      if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
-        CTL_DBG("Added dll directory: " << p.string());
-        engines_[e_type].cookie = cookie;
-      } else {
-        CTL_WRN("Could not add dll directory: " << p.string());
-      }
-
-      auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
-      if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
-          cuda_cookie != 0) {
-        CTL_DBG("Added cuda dll directory: " << p.string());
-        engines_[e_type].cuda_cookie = cuda_cookie;
-      } else {
-        CTL_WRN("Could not add cuda dll directory: " << p.string());
-      }
-    };
-
-#if defined(_WIN32)
-    if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
-#else
-    if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
-#endif
-        should_use_dll_search_path) {
-      if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
-          should_use_dll_search_path) {
-
-        {
-
-          // Remove llamacpp dll directory
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
-            CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed dll directory: " << kLlamaRepo);
-          }
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
-            CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
-          }
-        }
-
-        add_dll(ne, engine_dir_path);
-      } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
-        // Do nothing
-      } else {
-        add_dll(ne, engine_dir_path);
-      }
-    }
-#endif
-    engines_[ne].dl =
-        std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
-#if defined(__linux__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    CTL_INF("llamacpp_path: " << llamacpp_path);
-    // tensorrt is not supported for now
-    // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
-
-    auto new_v = llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
-
-  } catch (const cortex_cpp::dylib::load_error& e) {
-    CTL_ERR("Could not load engine: " << e.what());
-    engines_.erase(ne);
-    return cpp::fail("Could not load engine " + ne + ": " + e.what());
-  }
-
-  auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
-  engines_[ne].engine = func();
-
-  auto& en = std::get<EngineI*>(engines_[ne].engine);
-  if (ne == kLlamaRepo) {  //fix for llamacpp engine first
-    auto config = file_manager_utils::GetCortexConfig();
-    if (en->IsSupported("SetFileLogger")) {
-      en->SetFileLogger(config.maxLogLines,
-                        (std::filesystem::path(config.logFolderPath) /
-                         std::filesystem::path(config.logLlamaCppPath))
-                            .string());
-    } else {
-      CTL_WRN("Method SetFileLogger is not supported yet");
-    }
-    if (en->IsSupported("SetLogLevel")) {
-      en->SetLogLevel(logging_utils_helper::global_log_level);
-    } else {
-      CTL_WRN("Method SetLogLevel is not supported yet");
-    }
-  }
-  CTL_DBG("loaded engine: " << ne);
-  return {};
+  CTL_INF("Engine path: " << engine_dir_path.string()
+                          << ", custom_engine_path: " << custom_engine_path);
+  return std::make_pair(engine_dir_path, custom_engine_path);
 }
 
 cpp::result<void, std::string> EngineService::UnloadEngine(
     const std::string& engine) {
   auto ne = NormalizeEngine(engine);
   std::lock_guard<std::mutex> lock(engines_mutex_);
-  {
-    if (!IsEngineLoaded(ne)) {
-      return cpp::fail("Engine " + ne + " is not loaded yet!");
-    }
-    if (std::holds_alternative<EngineI*>(engines_[ne].engine)) {
-      delete std::get<EngineI*>(engines_[ne].engine);
-    } else {
-      delete std::get<RemoteEngineI*>(engines_[ne].engine);
-    }
-
-#if defined(_WIN32)
-    if (!RemoveDllDirectory(engines_[ne].cookie)) {
-      CTL_WRN("Could not remove dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed dll directory: " << ne);
-    }
-    if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
-      CTL_WRN("Could not remove cuda dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed cuda dll directory: " << ne);
-    }
-#endif
+  if (!IsEngineLoaded(ne)) {
+    return cpp::fail("Engine " + ne + " is not loaded yet!");
+  }
+  if (std::holds_alternative<EngineI*>(engines_[ne].engine)) {
+    LOG_INFO << "Unloading engine " << ne;
+    auto* e = std::get<EngineI*>(engines_[ne].engine);
+    auto unload_opts = EngineI::EngineUnloadOption{
+        .unload_dll = true,
+    };
+    e->Unload(unload_opts);
+    delete e;
     engines_.erase(ne);
+  } else {
+    delete std::get<RemoteEngineI*>(engines_[ne].engine);
   }
-  CTL_DBG("Unloaded engine " + ne);
+
+  CTL_DBG("Engine unloaded: " + ne);
   return {};
 }
 
@@ -1097,4 +1062,9 @@ cpp::result<Json::Value, std::string> EngineService::GetRemoteModels(
   } else {
     return res;
   }
-}
\ No newline at end of file
+}
+
+cpp::result<std::vector<std::string>, std::string>
+EngineService::GetSupportedEngineNames() {
+  return file_manager_utils::GetCortexConfig().supportedEngines;
+}
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index ab274825d..8299655f2 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -13,7 +13,6 @@
 #include "cortex-common/cortexpythoni.h"
 #include "cortex-common/remote_enginei.h"
 #include "database/engines.h"
-#include "extensions/remote-engine/remote_engine.h"
 #include "services/download_service.h"
 #include "utils/cpuid/cpu_info.h"
 #include "utils/dylib.h"
@@ -75,6 +74,9 @@ class EngineService : public EngineServiceI {
                 .cuda_driver_version =
                     system_info_utils::GetDriverAndCudaVersion().second} {}
 
+  // just for initialize supported engines
+  EngineService() {};
+
   std::vector<EngineInfo> GetEngineInfoList() const;
 
   /**
@@ -148,6 +150,9 @@ class EngineService : public EngineServiceI {
 
   cpp::result<Json::Value, std::string> GetRemoteModels(
       const std::string& engine_name);
+  cpp::result<std::vector<std::string>, std::string> GetSupportedEngineNames();
+
+  void RegisterEngineLibPath();
 
  private:
   bool IsEngineLoaded(const std::string& engine);
@@ -162,7 +167,10 @@ class EngineService : public EngineServiceI {
   std::string GetMatchedVariant(const std::string& engine,
                                 const std::vector<std::string>& variants);
 
+  cpp::result<std::pair<std::filesystem::path, bool>, std::string>
+  GetEngineDirPath(const std::string& engine_name);
+
   cpp::result<bool, std::string> IsEngineVariantReady(
       const std::string& engine, const std::string& version,
       const std::string& variant);
-};
\ No newline at end of file
+};
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 681ca7578..a5890eab9 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -5,11 +5,11 @@
 #if defined(_WIN32) || defined(_WIN64)
 #include <minwindef.h>
 #include <processenv.h>
+#include "utils/widechar_conv.h"
 #endif
 #include "cli/commands/cortex_upd_cmd.h"
 #include "database/hardware.h"
 #include "utils/cortex_utils.h"
-#include "utils/widechar_conv.h"
 
 namespace services {
 
diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc
index ed6437256..c7a696df4 100644
--- a/engine/utils/config_yaml_utils.cc
+++ b/engine/utils/config_yaml_utils.cc
@@ -49,6 +49,7 @@ cpp::result<void, std::string> CortexConfigMgr::DumpYamlConfig(
     node["verifyHostSsl"] = config.verifyHostSsl;
     node["sslCertPath"] = config.sslCertPath;
     node["sslKeyPath"] = config.sslKeyPath;
+    node["supportedEngines"] = config.supportedEngines;
 
     out_file << node;
     out_file.close();
diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
index d36cc48e0..f9925ea86 100644
--- a/engine/utils/config_yaml_utils.h
+++ b/engine/utils/config_yaml_utils.h
@@ -3,6 +3,7 @@
 #include <mutex>
 #include <string>
 #include <vector>
+#include "utils/engine_constants.h"
 #include "utils/result.hpp"
 
 namespace config_yaml_utils {
@@ -18,6 +19,8 @@ constexpr const auto kDefaultCorsEnabled = true;
 const std::vector<std::string> kDefaultEnabledOrigins{
     "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"};
 constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1";
+const std::vector<std::string> kDefaultSupportedEngines{
+    kLlamaEngine, kOnnxEngine, kTrtLlmEngine};
 
 struct CortexConfig {
   std::string logFolderPath;
@@ -57,6 +60,7 @@ struct CortexConfig {
   bool verifyHostSsl;
   std::string sslCertPath;
   std::string sslKeyPath;
+  std::vector<std::string> supportedEngines;
 };
 
 class CortexConfigMgr {
@@ -80,5 +84,4 @@ class CortexConfigMgr {
   CortexConfig FromYaml(const std::string& path,
                         const CortexConfig& default_cfg);
 };
-
 }  // namespace config_yaml_utils
diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc
index ca3d0c07b..338abadac 100644
--- a/engine/utils/file_manager_utils.cc
+++ b/engine/utils/file_manager_utils.cc
@@ -187,6 +187,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() {
       .verifyHostSsl = true,
       .sslCertPath = "",
       .sslKeyPath = "",
+      .supportedEngines = config_yaml_utils::kDefaultSupportedEngines,
   };
 }
 

From 3548342ebab9cdcc7d4becbb499a95d20298f976 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Tue, 10 Dec 2024 08:51:16 +0700
Subject: [PATCH 05/34] Feat: init python engine

---
 engine/CMakeLists.txt                         |   1 +
 engine/config/model_config.h                  | 316 ++++++++++++++++++
 .../extensions/python-engine/python_engine.cc |   0
 .../extensions/python-engine/python_engine.h  |   0
 engine/extensions/template_renderer.cc        | 136 ++++++++
 engine/extensions/template_renderer.h         |  40 +++
 engine/utils/config_yaml_utils.h              |   2 +-
 engine/utils/engine_constants.h               |   1 +
 engine/utils/environment_constants.h          |   3 +
 engine/vcpkg.json                             |   3 +-
 10 files changed, 500 insertions(+), 2 deletions(-)
 create mode 100644 engine/extensions/python-engine/python_engine.cc
 create mode 100644 engine/extensions/python-engine/python_engine.h
 create mode 100644 engine/extensions/template_renderer.cc
 create mode 100644 engine/extensions/template_renderer.h
 create mode 100644 engine/utils/environment_constants.h

diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index eae09d439..0206d3858 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -142,6 +142,7 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h"
 add_executable(${TARGET_NAME} main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc
 )
 
 target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index 7d4076ee5..a3be375c5 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -1,6 +1,9 @@
 #pragma once
 
 #include <json/json.h>
+#include <yaml-cpp/yaml.h>
+#include <fstream>
+#include <iostream>
 #include <limits>
 #include <sstream>
 #include <string>
@@ -343,4 +346,317 @@ struct ModelConfig {
   }
 };
 
+struct Endpoint {
+  std::string method;
+  std::string path;
+  std::string transform_request;
+  std::string transform_response;
+};
+
+struct PythonModelConfig {
+  // General Metadata
+  std::string id;
+  std::string model;
+  std::string name;
+  int version;
+
+  // Inference Parameters
+  Endpoint load_model;
+  Endpoint destroy;
+  Endpoint inference;
+  Endpoint heath_check;
+  std::vector<Endpoint> extra_endpoints;
+
+  // Model Load Parameters
+  int port;
+  std::string log_path;
+  std::string log_level;
+  std::string environments;
+  std::vector<std::string> command;  // New command field
+  std::string engine;
+  Json::Value extra_params;  // Accept dynamic extra parameters
+
+  // Method to convert C++ struct to YAML
+  std::string ToYaml() const {
+    YAML::Emitter out;
+    out << YAML::BeginMap;
+
+    out << YAML::Key << "id" << YAML::Value << id;
+    out << YAML::Key << "model" << YAML::Value << model;
+    out << YAML::Key << "name" << YAML::Value << name;
+    out << YAML::Key << "version" << YAML::Value << version;
+
+    // Inference Parameters
+    out << YAML::Key << "load_model" << YAML::Value << YAML::BeginMap;
+    out << YAML::Key << "method" << YAML::Value << load_model.method;
+    out << YAML::Key << "path" << YAML::Value << load_model.path;
+    out << YAML::Key << "transform_request" << YAML::Value
+        << load_model.transform_request;
+    out << YAML::Key << "transform_response" << YAML::Value
+        << load_model.transform_response;
+    out << YAML::EndMap;
+
+    out << YAML::Key << "destroy" << YAML::Value << YAML::BeginMap;
+    out << YAML::Key << "method" << YAML::Value << destroy.method;
+    out << YAML::Key << "path" << YAML::Value << destroy.path;
+    out << YAML::EndMap;
+
+    out << YAML::Key << "inference" << YAML::Value << YAML::BeginMap;
+    out << YAML::Key << "method" << YAML::Value << inference.method;
+    out << YAML::Key << "path" << YAML::Value << inference.path;
+    out << YAML::EndMap;
+
+    out << YAML::Key << "extra_endpoints" << YAML::Value << YAML::BeginSeq;
+    for (const auto& endpoint : extra_endpoints) {
+      out << YAML::BeginMap;
+      out << YAML::Key << "method" << YAML::Value << endpoint.method;
+      out << YAML::Key << "path" << YAML::Value << endpoint.path;
+      out << YAML::EndMap;
+    }
+    out << YAML::EndSeq;
+
+    // Model Load Parameters
+    out << YAML::Key << "port" << YAML::Value << port;
+    out << YAML::Key << "log_path" << YAML::Value << log_path;
+    out << YAML::Key << "log_level" << YAML::Value
+        << log_level;
+    out << YAML::Key << "environments" << YAML::Value
+        << environments;
+
+    // Serialize command as YAML list
+    out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq;
+    for (const auto& cmd : command) {
+      out << cmd;
+    }
+    out << YAML::EndSeq;
+
+    out << YAML::Key << "engine" << YAML::Value << engine;
+
+    // Serialize extra_params as YAML
+    out << YAML::Key << "extra_params" << YAML::Value << YAML::BeginMap;
+    for (Json::ValueConstIterator iter = extra_params.begin();
+         iter != extra_params.end(); ++iter) {
+      out << YAML::Key << iter.key().asString() << YAML::Value
+          << iter->asString();
+    }
+    out << YAML::EndMap;
+    return out.c_str();
+  }
+
+  // Method to populate struct from YAML file
+  void ReadFromYaml(const std::string& filePath) {
+    YAML::Node config = YAML::LoadFile(filePath);
+
+    if (config["id"])
+      id = config["id"].as<std::string>();
+    if (config["model"])
+      model = config["model"].as<std::string>();
+    if (config["name"])
+      name = config["name"].as<std::string>();
+    if (config["version"])
+      version = config["version"].as<int>();
+
+    // Inference Parameters
+
+      auto ip = config;
+      if (ip["load_model"]) {
+        load_model.method =
+            ip["load_model"]["method"].as<std::string>();
+        load_model.path =
+            ip["load_model"]["path"].as<std::string>();
+        load_model.transform_request =
+            ip["load_model"]["transform_request"].as<std::string>();
+        load_model.transform_response =
+            ip["load_model"]["transform_response"].as<std::string>();
+      }
+      if (ip["destroy"]) {
+        destroy.method =
+            ip["destroy"]["method"].as<std::string>();
+        destroy.path =
+            ip["destroy"]["path"].as<std::string>();
+      }
+      if (ip["inference"]) {
+        inference.method =
+            ip["inference"]["method"].as<std::string>();
+        inference.path =
+            ip["inference"]["path"].as<std::string>();
+      }
+      if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) {
+        for (const auto& endpoint : ip["extra_endpoints"]) {
+          Endpoint e;
+          e.method = endpoint["method"].as<std::string>();
+          e.path = endpoint["path"].as<std::string>();
+          extra_endpoints.push_back(e);
+        }
+      }
+    
+
+    // Model Load Parameters
+
+      auto mlp = config;
+      if (mlp["port"])
+        port = mlp["port"].as<int>();
+      if (mlp["log_path"])
+        log_path = mlp["log_path"].as<std::string>();
+      if (mlp["log_level"])
+        log_level = mlp["log_level"].as<std::string>();
+      if (mlp["environments"])
+        environments = mlp["environments"].as<std::string>();
+      if (mlp["engine"])
+        engine = mlp["engine"].as<std::string>();
+
+      if (mlp["command"] && mlp["command"].IsSequence()) {
+        for (const auto& cmd : mlp["command"]) {
+          command.push_back(cmd.as<std::string>());
+        }
+      }
+
+      if (mlp["extra_params"]) {
+        for (YAML::const_iterator it = mlp["extra_params"].begin();
+             it != mlp["extra_params"].end(); ++it) {
+          extra_params[it->first.as<std::string>()] =
+              it->second.as<std::string>();
+        }
+      }
+    
+  }
+
+  // Method to convert the struct to JSON
+  std::string ToJson() const {
+    Json::Value root;
+
+    root["id"] = id;
+    root["model"] = model;
+    root["name"] = name;
+    root["version"] = version;
+
+    // Inference Parameters
+    root["inference_parameters"]["load_model"]["method"] =
+        load_model.method;
+    root["inference_parameters"]["load_model"]["path"] =
+        load_model.path;
+    root["inference_parameters"]["load_model"]["transform_request"] =
+        load_model.transform_request;
+    root["inference_parameters"]["load_model"]["transform_response"] =
+        load_model.transform_response;
+
+    root["inference_parameters"]["destroy"]["method"] =
+        destroy.method;
+    root["inference_parameters"]["destroy"]["path"] =
+        destroy.path;
+
+    root["inference_parameters"]["inference"]["method"] =
+        inference.method;
+    root["inference_parameters"]["inference"]["path"] =
+        inference.path;
+
+    for (const auto& endpoint : extra_endpoints) {
+      Json::Value e;
+      e["method"] = endpoint.method;
+      e["path"] = endpoint.path;
+      root["inference_parameters"]["extra_endpoints"].append(e);
+    }
+
+    // Model Load Parameters
+    root["model_load_params"]["port"] = port;
+    root["model_load_params"]["log_path"] = log_path;
+    root["model_load_params"]["log_level"] = log_level;
+    root["model_load_params"]["environments"] = environments;
+
+    // Serialize command as JSON array
+    for (const auto& cmd : command) {
+      root["model_load_params"]["command"].append(cmd);
+    }
+
+    root["model_load_params"]["engine"] = engine;
+    root["model_load_params"]["extra_params"] =
+        extra_params;  // Serialize the JSON value directly
+
+    Json::StreamWriterBuilder writer;
+    return Json::writeString(writer, root);
+  }
+
+  // Method to populate struct from JSON
+  void FromJson(const std::string& jsonString) {
+    Json::CharReaderBuilder reader;
+    Json::Value root;
+    std::string errs;
+    std::istringstream s(jsonString);
+
+    if (!Json::parseFromStream(reader, s, &root, &errs)) {
+      std::cerr << "Error parsing JSON: " << errs << std::endl;
+      return;
+    }
+
+    if (root.isMember("id"))
+      id = root["id"].asString();
+    if (root.isMember("model"))
+      model = root["model"].asString();
+    if (root.isMember("name"))
+      name = root["name"].asString();
+    if (root.isMember("version"))
+      version = root["version"].asInt();
+
+    // Inference Parameters
+    if (root.isMember("inference_parameters")) {
+      const Json::Value& ip = root["inference_parameters"];
+      if (ip.isMember("load_model")) {
+        load_model.method =
+            ip["load_model"]["method"].asString();
+        load_model.path =
+            ip["load_model"]["path"].asString();
+        load_model.transform_request =
+            ip["load_model"]["transform_request"].asString();
+        load_model.transform_response =
+            ip["load_model"]["transform_response"].asString();
+      }
+      if (ip.isMember("destroy")) {
+        destroy.method =
+            ip["destroy"]["method"].asString();
+        destroy.path = ip["destroy"]["path"].asString();
+      }
+      if (ip.isMember("inference")) {
+        inference.method =
+            ip["inference"]["method"].asString();
+        inference.path =
+            ip["inference"]["path"].asString();
+      }
+      if (ip.isMember("extra_endpoints")) {
+        for (const auto& endpoint : ip["extra_endpoints"]) {
+          Endpoint e;
+          e.method = endpoint["method"].asString();
+          e.path = endpoint["path"].asString();
+          extra_endpoints.push_back(e);
+        }
+      }
+    }
+
+    // Model Load Parameters
+    if (root.isMember("model_load_params")) {
+      const Json::Value& mlp = root["model_load_params"];
+      if (mlp.isMember("port"))
+        port = mlp["port"].asInt();
+      if (mlp.isMember("log_path"))
+        log_path = mlp["log_path"].asString();
+      if (mlp.isMember("log_level"))
+        log_level = mlp["log_level"].asString();
+      if (mlp.isMember("environments"))
+        environments = mlp["environments"].asString();
+      if (mlp.isMember("engine"))
+        engine = mlp["engine"].asString();
+
+      if (mlp.isMember("command")) {
+        for (const auto& cmd : mlp["command"]) {
+          command.push_back(cmd.asString());
+        }
+      }
+
+      if (mlp.isMember("extra_params")) {
+        extra_params =
+            mlp["extra_params"];  // Directly assign the JSON value
+      }
+    }
+  }
+};
+
 }  // namespace config
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
new file mode 100644
index 000000000..e69de29bb
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
new file mode 100644
index 000000000..e69de29bb
diff --git a/engine/extensions/template_renderer.cc b/engine/extensions/template_renderer.cc
new file mode 100644
index 000000000..15514d17c
--- /dev/null
+++ b/engine/extensions/template_renderer.cc
@@ -0,0 +1,136 @@
+#if defined(_WIN32) || defined(_WIN64)
+#define NOMINMAX
+#undef min
+#undef max
+#endif
+#include "template_renderer.h"
+#include <regex>
+#include <stdexcept>
+#include "utils/logging_utils.h"
+namespace remote_engine {
+TemplateRenderer::TemplateRenderer() {
+  // Configure Inja environment
+  env_.set_trim_blocks(true);
+  env_.set_lstrip_blocks(true);
+
+  // Add tojson function for all value types
+  env_.add_callback("tojson", 1, [](inja::Arguments& args) {
+    if (args.empty()) {
+      return nlohmann::json(nullptr);
+    }
+    const auto& value = *args[0];
+
+    if (value.is_string()) {
+      return nlohmann::json(std::string("\"") + value.get<std::string>() +
+                            "\"");
+    }
+    return value;
+  });
+}
+
+std::string TemplateRenderer::Render(const std::string& tmpl,
+                                     const Json::Value& data) {
+  try {
+    // Convert Json::Value to nlohmann::json
+    auto json_data = ConvertJsonValue(data);
+
+    // Create the input data structure expected by the template
+    nlohmann::json template_data;
+    template_data["input_request"] = json_data;
+
+    // Debug output
+    LOG_DEBUG << "Template: " << tmpl;
+    LOG_DEBUG << "Data: " << template_data.dump(2);
+
+    // Render template
+    std::string result = env_.render(tmpl, template_data);
+
+    // Clean up any potential double quotes in JSON strings
+    result = std::regex_replace(result, std::regex("\\\"\\\""), "\"");
+
+    LOG_DEBUG << "Result: " << result;
+
+    // Validate JSON
+    auto parsed = nlohmann::json::parse(result);
+
+    return result;
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Template rendering failed: " << e.what();
+    LOG_ERROR << "Template: " << tmpl;
+    throw std::runtime_error(std::string("Template rendering failed: ") +
+                             e.what());
+  }
+}
+
+nlohmann::json TemplateRenderer::ConvertJsonValue(const Json::Value& input) {
+  if (input.isNull()) {
+    return nullptr;
+  } else if (input.isBool()) {
+    return input.asBool();
+  } else if (input.isInt()) {
+    return input.asInt();
+  } else if (input.isUInt()) {
+    return input.asUInt();
+  } else if (input.isDouble()) {
+    return input.asDouble();
+  } else if (input.isString()) {
+    return input.asString();
+  } else if (input.isArray()) {
+    nlohmann::json arr = nlohmann::json::array();
+    for (const auto& element : input) {
+      arr.push_back(ConvertJsonValue(element));
+    }
+    return arr;
+  } else if (input.isObject()) {
+    nlohmann::json obj = nlohmann::json::object();
+    for (const auto& key : input.getMemberNames()) {
+      obj[key] = ConvertJsonValue(input[key]);
+    }
+    return obj;
+  }
+  return nullptr;
+}
+
+Json::Value TemplateRenderer::ConvertNlohmannJson(const nlohmann::json& input) {
+  if (input.is_null()) {
+    return Json::Value();
+  } else if (input.is_boolean()) {
+    return Json::Value(input.get<bool>());
+  } else if (input.is_number_integer()) {
+    return Json::Value(input.get<int>());
+  } else if (input.is_number_unsigned()) {
+    return Json::Value(input.get<unsigned int>());
+  } else if (input.is_number_float()) {
+    return Json::Value(input.get<double>());
+  } else if (input.is_string()) {
+    return Json::Value(input.get<std::string>());
+  } else if (input.is_array()) {
+    Json::Value arr(Json::arrayValue);
+    for (const auto& element : input) {
+      arr.append(ConvertNlohmannJson(element));
+    }
+    return arr;
+  } else if (input.is_object()) {
+    Json::Value obj(Json::objectValue);
+    for (auto it = input.begin(); it != input.end(); ++it) {
+      obj[it.key()] = ConvertNlohmannJson(it.value());
+    }
+    return obj;
+  }
+  return Json::Value();
+}
+
+std::string TemplateRenderer::RenderFile(const std::string& template_path,
+                                         const Json::Value& data) {
+  try {
+    // Convert Json::Value to nlohmann::json
+    auto json_data = ConvertJsonValue(data);
+
+    // Load and render template
+    return env_.render_file(template_path, json_data);
+  } catch (const std::exception& e) {
+    throw std::runtime_error(std::string("Template file rendering failed: ") +
+                             e.what());
+  }
+}
+}  // namespace remote_engine
\ No newline at end of file
diff --git a/engine/extensions/template_renderer.h b/engine/extensions/template_renderer.h
new file mode 100644
index 000000000..f59e7cc93
--- /dev/null
+++ b/engine/extensions/template_renderer.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <memory>
+
+#include <string>
+#include "json/json.h"
+#include "trantor/utils/Logger.h"
+// clang-format off
+#if defined(_WIN32) || defined(_WIN64)
+#define NOMINMAX
+#undef min
+#undef max
+#endif
+#include <nlohmann/json.hpp>
+#include <inja/inja.hpp>
+// clang-format on
+namespace remote_engine {
+class TemplateRenderer {
+ public:
+  TemplateRenderer();
+  ~TemplateRenderer() = default;
+
+  // Convert Json::Value to nlohmann::json
+  static nlohmann::json ConvertJsonValue(const Json::Value& input);
+
+  // Convert nlohmann::json to Json::Value
+  static Json::Value ConvertNlohmannJson(const nlohmann::json& input);
+
+  // Render template with data
+  std::string Render(const std::string& tmpl, const Json::Value& data);
+
+  // Load template from file and render
+  std::string RenderFile(const std::string& template_path,
+                         const Json::Value& data);
+
+ private:
+  inja::Environment env_;
+};
+
+}  // namespace remote_engine
\ No newline at end of file
diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
index caaa4dacf..674bd594c 100644
--- a/engine/utils/config_yaml_utils.h
+++ b/engine/utils/config_yaml_utils.h
@@ -24,7 +24,7 @@ const std::vector<std::string> kDefaultEnabledOrigins{
     "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"};
 constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1";
 const std::vector<std::string> kDefaultSupportedEngines{
-    kLlamaEngine, kOnnxEngine, kTrtLlmEngine};
+    kLlamaEngine, kOnnxEngine, kTrtLlmEngine, kPythonEngine};
 
 struct CortexConfig {
   std::string logFolderPath;
diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h
index 5dab49936..ac96c94ae 100644
--- a/engine/utils/engine_constants.h
+++ b/engine/utils/engine_constants.h
@@ -3,6 +3,7 @@
 constexpr const auto kOnnxEngine = "onnxruntime";
 constexpr const auto kLlamaEngine = "llama-cpp";
 constexpr const auto kTrtLlmEngine = "tensorrt-llm";
+constexpr const auto kPythonEngine = "python-engine";
 
 constexpr const auto kOnnxRepo = "cortex.onnx";
 constexpr const auto kLlamaRepo = "cortex.llamacpp";
diff --git a/engine/utils/environment_constants.h b/engine/utils/environment_constants.h
new file mode 100644
index 000000000..f14df67f8
--- /dev/null
+++ b/engine/utils/environment_constants.h
@@ -0,0 +1,3 @@
+#pragma once
+
+constexpr const auto kWhisperVQEnvironment = "whispervq"; 
\ No newline at end of file
diff --git a/engine/vcpkg.json b/engine/vcpkg.json
index 36fa322a3..17eb32f4c 100644
--- a/engine/vcpkg.json
+++ b/engine/vcpkg.json
@@ -13,6 +13,7 @@
     "sqlitecpp",
     "trantor",
     "indicators",
-    "lfreist-hwinfo"
+    "lfreist-hwinfo",
+    "inja"
   ]
 }

From 6958db86165f60922729a844cf451c66cab42008 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Tue, 10 Dec 2024 09:09:55 +0700
Subject: [PATCH 06/34] Fix: conflict

---
 engine/extensions/template_renderer.cc | 2 +-
 engine/extensions/template_renderer.h  | 2 +-
 engine/services/engine_service.cc      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/engine/extensions/template_renderer.cc b/engine/extensions/template_renderer.cc
index 15514d17c..32e7d72f5 100644
--- a/engine/extensions/template_renderer.cc
+++ b/engine/extensions/template_renderer.cc
@@ -7,7 +7,7 @@
 #include <regex>
 #include <stdexcept>
 #include "utils/logging_utils.h"
-namespace remote_engine {
+namespace extensions {
 TemplateRenderer::TemplateRenderer() {
   // Configure Inja environment
   env_.set_trim_blocks(true);
diff --git a/engine/extensions/template_renderer.h b/engine/extensions/template_renderer.h
index f59e7cc93..7eccef2eb 100644
--- a/engine/extensions/template_renderer.h
+++ b/engine/extensions/template_renderer.h
@@ -14,7 +14,7 @@
 #include <nlohmann/json.hpp>
 #include <inja/inja.hpp>
 // clang-format on
-namespace remote_engine {
+namespace extensions {
 class TemplateRenderer {
  public:
   TemplateRenderer();
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 1e877e388..5e2622240 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -686,7 +686,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     CTL_INF("Engine " << ne << " is already loaded");
     return {};
   }
-}
+
 
   // Check for remote engine
   if (remote_engine::IsRemoteEngine(engine_name)) {

From ff2c02df8e84afd0bf0a9625190a001655442494 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Wed, 11 Dec 2024 16:17:54 +0700
Subject: [PATCH 07/34] feat: add python engine implementation

---
 engine/CMakeLists.txt                         |   1 -
 engine/cli/CMakeLists.txt                     |   3 +-
 engine/config/model_config.h                  | 268 +++---
 engine/cortex-common/EngineI.h                |   3 +
 .../extensions/python-engine/python_engine.cc | 768 ++++++++++++++++++
 .../extensions/python-engine/python_engine.h  | 160 ++++
 .../extensions/remote-engine/remote_engine.h  |   4 +-
 .../remote-engine/template_renderer.cc        | 136 ----
 .../remote-engine/template_renderer.h         |  40 -
 9 files changed, 1062 insertions(+), 321 deletions(-)
 delete mode 100644 engine/extensions/remote-engine/template_renderer.cc
 delete mode 100644 engine/extensions/remote-engine/template_renderer.h

diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 2c6e55012..db34a8346 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -148,7 +148,6 @@ add_executable(${TARGET_NAME} main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc
 
 )
 
diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index 51382dc13..833bd4c03 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -85,7 +85,8 @@ add_executable(${TARGET_NAME} main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../utils/config_yaml_utils.cc
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index f7a0701d9..78f62d9ca 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -546,10 +546,12 @@ struct PythonModelConfig {
   std::vector<Endpoint> extra_endpoints;
 
   // Model Load Parameters
-  int port;
+  std::string port;
+  std::string files;
+  std::string script;
   std::string log_path;
   std::string log_level;
-  std::string environments;
+  std::string environment;
   std::vector<std::string> command;  // New command field
   std::string engine;
   Json::Value extra_params;  // Accept dynamic extra parameters
@@ -595,11 +597,11 @@ struct PythonModelConfig {
 
     // Model Load Parameters
     out << YAML::Key << "port" << YAML::Value << port;
+    out << YAML::Key << "files" << YAML::Value << files;
+    out << YAML::Key << "script" << YAML::Value << script;
     out << YAML::Key << "log_path" << YAML::Value << log_path;
-    out << YAML::Key << "log_level" << YAML::Value
-        << log_level;
-    out << YAML::Key << "environments" << YAML::Value
-        << environments;
+    out << YAML::Key << "log_level" << YAML::Value << log_level;
+    out << YAML::Key << "environment" << YAML::Value << environment;
 
     // Serialize command as YAML list
     out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq;
@@ -636,71 +638,67 @@ struct PythonModelConfig {
 
     // Inference Parameters
 
-      auto ip = config;
-      if (ip["load_model"]) {
-        load_model.method =
-            ip["load_model"]["method"].as<std::string>();
-        load_model.path =
-            ip["load_model"]["path"].as<std::string>();
-        load_model.transform_request =
-            ip["load_model"]["transform_request"].as<std::string>();
-        load_model.transform_response =
-            ip["load_model"]["transform_response"].as<std::string>();
-      }
-      if (ip["destroy"]) {
-        destroy.method =
-            ip["destroy"]["method"].as<std::string>();
-        destroy.path =
-            ip["destroy"]["path"].as<std::string>();
-      }
-      if (ip["inference"]) {
-        inference.method =
-            ip["inference"]["method"].as<std::string>();
-        inference.path =
-            ip["inference"]["path"].as<std::string>();
-      }
-      if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) {
-        for (const auto& endpoint : ip["extra_endpoints"]) {
-          Endpoint e;
-          e.method = endpoint["method"].as<std::string>();
-          e.path = endpoint["path"].as<std::string>();
-          extra_endpoints.push_back(e);
-        }
+    auto ip = config;
+    if (ip["load_model"]) {
+      load_model.method = ip["load_model"]["method"].as<std::string>();
+      load_model.path = ip["load_model"]["path"].as<std::string>();
+      load_model.transform_request =
+          ip["load_model"]["transform_request"].as<std::string>();
+      load_model.transform_response =
+          ip["load_model"]["transform_response"].as<std::string>();
+    }
+    if (ip["destroy"]) {
+      destroy.method = ip["destroy"]["method"].as<std::string>();
+      destroy.path = ip["destroy"]["path"].as<std::string>();
+    }
+    if (ip["inference"]) {
+      inference.method = ip["inference"]["method"].as<std::string>();
+      inference.path = ip["inference"]["path"].as<std::string>();
+    }
+    if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) {
+      for (const auto& endpoint : ip["extra_endpoints"]) {
+        Endpoint e;
+        e.method = endpoint["method"].as<std::string>();
+        e.path = endpoint["path"].as<std::string>();
+        extra_endpoints.push_back(e);
       }
-    
+    }
 
     // Model Load Parameters
 
-      auto mlp = config;
-      if (mlp["port"])
-        port = mlp["port"].as<int>();
-      if (mlp["log_path"])
-        log_path = mlp["log_path"].as<std::string>();
-      if (mlp["log_level"])
-        log_level = mlp["log_level"].as<std::string>();
-      if (mlp["environments"])
-        environments = mlp["environments"].as<std::string>();
-      if (mlp["engine"])
-        engine = mlp["engine"].as<std::string>();
-
-      if (mlp["command"] && mlp["command"].IsSequence()) {
-        for (const auto& cmd : mlp["command"]) {
-          command.push_back(cmd.as<std::string>());
-        }
+    auto mlp = config;
+    if (mlp["port"])
+      port = mlp["port"].as<std::string>();
+    if (mlp["files"])
+      files = mlp["files"].as<std::string>();
+    if (mlp["script"])
+      script = mlp["script"].as<std::string>();
+    if (mlp["log_path"])
+      log_path = mlp["log_path"].as<std::string>();
+    if (mlp["log_level"])
+      log_level = mlp["log_level"].as<std::string>();
+    if (mlp["environment"])
+      environment = mlp["environment"].as<std::string>();
+    if (mlp["engine"])
+      engine = mlp["engine"].as<std::string>();
+
+    if (mlp["command"] && mlp["command"].IsSequence()) {
+      for (const auto& cmd : mlp["command"]) {
+        command.push_back(cmd.as<std::string>());
       }
+    }
 
-      if (mlp["extra_params"]) {
-        for (YAML::const_iterator it = mlp["extra_params"].begin();
-             it != mlp["extra_params"].end(); ++it) {
-          extra_params[it->first.as<std::string>()] =
-              it->second.as<std::string>();
-        }
+    if (mlp["extra_params"]) {
+      for (YAML::const_iterator it = mlp["extra_params"].begin();
+           it != mlp["extra_params"].end(); ++it) {
+        extra_params[it->first.as<std::string>()] =
+            it->second.as<std::string>();
       }
-    
+    }
   }
 
   // Method to convert the struct to JSON
-  std::string ToJson() const {
+  Json::Value ToJson() const {
     Json::Value root;
 
     root["id"] = id;
@@ -709,49 +707,41 @@ struct PythonModelConfig {
     root["version"] = version;
 
     // Inference Parameters
-    root["inference_parameters"]["load_model"]["method"] =
-        load_model.method;
-    root["inference_parameters"]["load_model"]["path"] =
-        load_model.path;
-    root["inference_parameters"]["load_model"]["transform_request"] =
-        load_model.transform_request;
-    root["inference_parameters"]["load_model"]["transform_response"] =
-        load_model.transform_response;
-
-    root["inference_parameters"]["destroy"]["method"] =
-        destroy.method;
-    root["inference_parameters"]["destroy"]["path"] =
-        destroy.path;
-
-    root["inference_parameters"]["inference"]["method"] =
-        inference.method;
-    root["inference_parameters"]["inference"]["path"] =
-        inference.path;
+    root["load_model"]["method"] = load_model.method;
+    root["load_model"]["path"] = load_model.path;
+    root["load_model"]["transform_request"] = load_model.transform_request;
+    root["load_model"]["transform_response"] = load_model.transform_response;
+
+    root["destroy"]["method"] = destroy.method;
+    root["destroy"]["path"] = destroy.path;
+
+    root["inference"]["method"] = inference.method;
+    root["inference"]["path"] = inference.path;
 
     for (const auto& endpoint : extra_endpoints) {
       Json::Value e;
       e["method"] = endpoint.method;
       e["path"] = endpoint.path;
-      root["inference_parameters"]["extra_endpoints"].append(e);
+      root["extra_endpoints"].append(e);
     }
 
     // Model Load Parameters
-    root["model_load_params"]["port"] = port;
-    root["model_load_params"]["log_path"] = log_path;
-    root["model_load_params"]["log_level"] = log_level;
-    root["model_load_params"]["environments"] = environments;
+    root["port"] = port;
+    root["log_path"] = log_path;
+    root["log_level"] = log_level;
+    root["environment"] = environment;
+    root["files"] = files;
+    root["script"] = script;
 
     // Serialize command as JSON array
     for (const auto& cmd : command) {
-      root["model_load_params"]["command"].append(cmd);
+      root["command"].append(cmd);
     }
 
-    root["model_load_params"]["engine"] = engine;
-    root["model_load_params"]["extra_params"] =
-        extra_params;  // Serialize the JSON value directly
+    root["engine"] = engine;
+    root["extra_params"] = extra_params;  // Serialize the JSON value directly
 
-    Json::StreamWriterBuilder writer;
-    return Json::writeString(writer, root);
+    return root;
   }
 
   // Method to populate struct from JSON
@@ -776,64 +766,60 @@ struct PythonModelConfig {
       version = root["version"].asInt();
 
     // Inference Parameters
-    if (root.isMember("inference_parameters")) {
-      const Json::Value& ip = root["inference_parameters"];
-      if (ip.isMember("load_model")) {
-        load_model.method =
-            ip["load_model"]["method"].asString();
-        load_model.path =
-            ip["load_model"]["path"].asString();
-        load_model.transform_request =
-            ip["load_model"]["transform_request"].asString();
-        load_model.transform_response =
-            ip["load_model"]["transform_response"].asString();
-      }
-      if (ip.isMember("destroy")) {
-        destroy.method =
-            ip["destroy"]["method"].asString();
-        destroy.path = ip["destroy"]["path"].asString();
-      }
-      if (ip.isMember("inference")) {
-        inference.method =
-            ip["inference"]["method"].asString();
-        inference.path =
-            ip["inference"]["path"].asString();
-      }
-      if (ip.isMember("extra_endpoints")) {
-        for (const auto& endpoint : ip["extra_endpoints"]) {
-          Endpoint e;
-          e.method = endpoint["method"].asString();
-          e.path = endpoint["path"].asString();
-          extra_endpoints.push_back(e);
-        }
+
+    const Json::Value& ip = root;
+    if (ip.isMember("load_model")) {
+      load_model.method = ip["load_model"]["method"].asString();
+      load_model.path = ip["load_model"]["path"].asString();
+      load_model.transform_request =
+          ip["load_model"]["transform_request"].asString();
+      load_model.transform_response =
+          ip["load_model"]["transform_response"].asString();
+    }
+    if (ip.isMember("destroy")) {
+      destroy.method = ip["destroy"]["method"].asString();
+      destroy.path = ip["destroy"]["path"].asString();
+    }
+    if (ip.isMember("inference")) {
+      inference.method = ip["inference"]["method"].asString();
+      inference.path = ip["inference"]["path"].asString();
+    }
+    if (ip.isMember("extra_endpoints")) {
+      for (const auto& endpoint : ip["extra_endpoints"]) {
+        Endpoint e;
+        e.method = endpoint["method"].asString();
+        e.path = endpoint["path"].asString();
+        extra_endpoints.push_back(e);
       }
     }
 
     // Model Load Parameters
-    if (root.isMember("model_load_params")) {
-      const Json::Value& mlp = root["model_load_params"];
-      if (mlp.isMember("port"))
-        port = mlp["port"].asInt();
-      if (mlp.isMember("log_path"))
-        log_path = mlp["log_path"].asString();
-      if (mlp.isMember("log_level"))
-        log_level = mlp["log_level"].asString();
-      if (mlp.isMember("environments"))
-        environments = mlp["environments"].asString();
-      if (mlp.isMember("engine"))
-        engine = mlp["engine"].asString();
-
-      if (mlp.isMember("command")) {
-        for (const auto& cmd : mlp["command"]) {
-          command.push_back(cmd.asString());
-        }
-      }
 
-      if (mlp.isMember("extra_params")) {
-        extra_params =
-            mlp["extra_params"];  // Directly assign the JSON value
+    const Json::Value& mlp = root;
+    if (mlp.isMember("port"))
+      port = mlp["port"].asString();
+    if (mlp.isMember("log_path"))
+      log_path = mlp["log_path"].asString();
+    if (mlp.isMember("log_level"))
+      log_level = mlp["log_level"].asString();
+    if (mlp.isMember("environment"))
+      environment = mlp["environment"].asString();
+    if (mlp.isMember("engine"))
+      engine = mlp["engine"].asString();
+    if (mlp.isMember("files"))
+      files = mlp["files"].asString();
+    if (mlp.isMember("script"))
+      script = mlp["script"].asString();
+
+    if (mlp.isMember("command")) {
+      for (const auto& cmd : mlp["command"]) {
+        command.push_back(cmd.asString());
       }
     }
+
+    if (mlp.isMember("extra_params")) {
+      extra_params = mlp["extra_params"];  // Directly assign the JSON value
+    }
   }
 };
 
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index 11866a708..64fe3c838 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -69,4 +69,7 @@ class EngineI {
   virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0;
 
   virtual Json::Value GetRemoteModels() = 0;
+  virtual void HandleRequest(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 };
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index e69de29bb..83f85126c 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -0,0 +1,768 @@
+#include "python_engine.h"
+#include <filesystem>
+#include <iostream>
+#include <sstream>
+#include <string>
+namespace python_engine {
+constexpr const int k200OK = 200;
+constexpr const int k400BadRequest = 400;
+constexpr const int k409Conflict = 409;
+constexpr const int k500InternalServerError = 500;
+constexpr const int kFileLoggerOption = 0;
+
+static size_t WriteCallback(char* ptr, size_t size, size_t nmemb,
+                            std::string* data) {
+  data->append(ptr, size * nmemb);
+  return size * nmemb;
+}
+
+PythonEngine::PythonEngine() {
+  curl_global_init(CURL_GLOBAL_ALL);
+}
+
+PythonEngine::~PythonEngine() {
+  curl_global_cleanup();
+}
+
+config::PythonModelConfig* PythonEngine::GetModelConfig(
+    const std::string& model) {
+  std::shared_lock lock(models_mutex_);
+  auto it = models_.find(model);
+  if (it != models_.end()) {
+    return &it->second;
+  }
+  return nullptr;
+}
+std::string constructWindowsCommandLine(const std::vector<std::string>& args) {
+  std::string cmdLine;
+  for (const auto& arg : args) {
+    // Simple escaping for Windows command line
+    std::string escapedArg = arg;
+    if (escapedArg.find(' ') != std::string::npos) {
+      // Wrap in quotes and escape existing quotes
+      for (char& c : escapedArg) {
+        if (c == '"')
+          c = '\\';
+      }
+      escapedArg = "\"" + escapedArg + "\"";
+    }
+    cmdLine += escapedArg + " ";
+  }
+  return cmdLine;
+}
+
+std::vector<char*> convertToArgv(const std::vector<std::string>& args) {
+  std::vector<char*> argv;
+  for (const auto& arg : args) {
+    argv.push_back(const_cast<char*>(arg.c_str()));
+  }
+  argv.push_back(nullptr);
+  return argv;
+}
+
+pid_t PythonEngine::SpawnProcess(const std::string& model,
+                                 const std::vector<std::string>& command) {
+  try {
+#ifdef _WIN32
+    // Windows process creation
+    STARTUPINFOA si = {0};
+    PROCESS_INFORMATION pi = {0};
+    si.cb = sizeof(si);
+
+    // Construct command line
+    std::string cmdLine = constructWindowsCommandLine(command);
+
+    // Convert string to char* for Windows API
+    char commandBuffer[4096];
+    strncpy_s(commandBuffer, cmdLine.c_str(), sizeof(commandBuffer));
+
+    if (!CreateProcessA(NULL,           // lpApplicationName
+                        commandBuffer,  // lpCommandLine
+                        NULL,           // lpProcessAttributes
+                        NULL,           // lpThreadAttributes
+                        FALSE,          // bInheritHandles
+                        0,              // dwCreationFlags
+                        NULL,           // lpEnvironment
+                        NULL,           // lpCurrentDirectory
+                        &si,            // lpStartupInfo
+                        &pi             // lpProcessInformation
+                        )) {
+      throw std::runtime_error("Failed to create process on Windows");
+    }
+
+    // Store the process ID
+    pid_t pid = pi.dwProcessId;
+    processMap[model] = pid;
+
+    // Close handles to avoid resource leaks
+    CloseHandle(pi.hProcess);
+    CloseHandle(pi.hThread);
+
+    return pid;
+
+#elif __APPLE__ || __linux__
+    // POSIX process creation
+    pid_t pid;
+
+    // Convert command vector to char*[]
+    std::vector<char*> argv = convertToArgv(command);
+    for (auto c : command) {
+      std::cout << c << " " << std::endl;
+    }
+
+    // Use posix_spawn for cross-platform compatibility
+    int spawn_result = posix_spawn(&pid,                // pid output
+                                   command[0].c_str(),  // executable path
+                                   NULL,                // file actions
+                                   NULL,                // spawn attributes
+                                   argv.data(),         // argument vector
+                                   NULL                 // environment (inherit)
+    );
+
+    if (spawn_result != 0) {
+      throw std::runtime_error("Failed to spawn process");
+    }
+
+    // Store the process ID
+    processMap[model] = pid;
+    return pid;
+
+#else
+#error Unsupported platform
+#endif
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Process spawning error: " << e.what();
+    return -1;
+  }
+}
+bool PythonEngine::TerminateProcess(const std::string& model) {
+  auto it = processMap.find(model);
+  if (it == processMap.end()) {
+    LOG_ERROR << "No process found for model: " << model
+              << ", removing from list running models.";
+    models_.erase(model);
+    return false;
+  }
+
+#ifdef _WIN32
+  HANDLE hProcess = OpenProcess(PROCESS_TERMINATE, FALSE, it->second);
+  if (hProcess == NULL) {
+    LOG_ERROR << "Failed to open process";
+    return false;
+  }
+
+  bool terminated = TerminateProcess(hProcess, 0) == TRUE;
+  CloseHandle(hProcess);
+
+  if (terminated) {
+    processMap.erase(it);
+    return true;
+  }
+
+#elif __APPLE__ || __linux__
+  int result = kill(it->second, SIGTERM);
+  if (result == 0) {
+    processMap.erase(it);
+    return true;
+  }
+#endif
+
+  return false;
+}
+CurlResponse PythonEngine::MakeGetRequest(const std::string& model,
+                                          const std::string& path) {
+  auto config = models_[model];
+  CURL* curl = curl_easy_init();
+  CurlResponse response;
+
+  if (!curl) {
+    response.error = true;
+    response.error_message = "Failed to initialize CURL";
+    return response;
+  }
+
+  std::string full_url = "http://localhost:" + config.port + path;
+
+  struct curl_slist* headers = nullptr;
+
+  headers = curl_slist_append(headers, "Content-Type: application/json");
+
+  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
+  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+  std::string response_string;
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+
+  CURLcode res = curl_easy_perform(curl);
+  if (res != CURLE_OK) {
+    response.error = true;
+    response.error_message = curl_easy_strerror(res);
+  } else {
+    response.body = response_string;
+  }
+
+  curl_slist_free_all(headers);
+  curl_easy_cleanup(curl);
+  return response;
+}
+CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model,
+                                             const std::string& path) {
+  auto config = models_[model];
+  CURL* curl = curl_easy_init();
+  CurlResponse response;
+
+  if (!curl) {
+    response.error = true;
+    response.error_message = "Failed to initialize CURL";
+    return response;
+  }
+  std::string full_url = "http://localhost:" + config.port + path;
+
+  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
+  curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
+
+  std::string response_string;
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+
+  CURLcode res = curl_easy_perform(curl);
+  if (res != CURLE_OK) {
+    response.error = true;
+    response.error_message = curl_easy_strerror(res);
+  } else {
+    response.body = response_string;
+  }
+
+  curl_easy_cleanup(curl);
+  return response;
+}
+
+CurlResponse PythonEngine::MakePostRequest(const std::string& model,
+                                           const std::string& path,
+                                           const std::string& body) {
+  auto config = models_[model];
+  CURL* curl = curl_easy_init();
+  CurlResponse response;
+
+  if (!curl) {
+    response.error = true;
+    response.error_message = "Failed to initialize CURL";
+    return response;
+  }
+  std::string full_url = "http://localhost:" + config.port + path;
+
+  struct curl_slist* headers = nullptr;
+  headers = curl_slist_append(headers, "Content-Type: application/json");
+
+  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
+  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+  curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
+
+  std::string response_string;
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+
+  CURLcode res = curl_easy_perform(curl);
+  if (res != CURLE_OK) {
+    response.error = true;
+    response.error_message = curl_easy_strerror(res);
+  } else {
+    response.body = response_string;
+  }
+
+  curl_slist_free_all(headers);
+  curl_easy_cleanup(curl);
+  return response;
+}
+
+bool PythonEngine::LoadModelConfig(const std::string& model,
+                                   const std::string& yaml_path) {
+  try {
+    config::PythonModelConfig config;
+    config.ReadFromYaml(yaml_path);
+    std::unique_lock lock(models_mutex_);
+    models_[model] = config;
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Failed to load model config: " << e.what();
+    return false;
+  }
+
+  return true;
+}
+
+void PythonEngine::GetModels(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+
+  Json::Value response_json;
+  Json::Value model_array(Json::arrayValue);
+
+  for (const auto& pair : models_) {
+    auto val = pair.second.ToJson();
+    model_array.append(val);
+  }
+
+  response_json["object"] = "list";
+  response_json["data"] = model_array;
+
+  Json::Value status;
+  status["is_done"] = true;
+  status["has_error"] = false;
+  status["is_stream"] = false;
+  status["status_code"] = k200OK;
+
+  callback(std::move(status), std::move(response_json));
+}
+
+void PythonEngine::LoadModel(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+  pid_t pid;
+  if (!json_body->isMember("model") || !json_body->isMember("model_path")) {
+    Json::Value error;
+    error["error"] = "Missing required fields: model or model_path";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  const std::string& model = (*json_body)["model"].asString();
+  const std::string& model_path = (*json_body)["model_path"].asString();
+  if (models_.find(model) != models_.end()) {
+    Json::Value error;
+    error["error"] = "Model already loaded!";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k409Conflict;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  if (!LoadModelConfig(model, model_path)) {
+    Json::Value error;
+    error["error"] = "Failed to load model configuration";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k500InternalServerError;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+  auto model_config = models_[model];
+  try {
+    std::string data_folder_path =
+        "/home/thuan/cortexcpp/environments/";  // To do: will be removed with cortex data path
+    std::string model_folder_path =
+        "/home/thuan/cortexcpp/models/cortex.so/whispervq/fp16/";  // To do: will be removed with cortex model path
+#ifdef _WIN32
+    auto executable = std::filesystem::path(data_folder_path) /
+                      std::filesystem::path(model_config.environment) /
+                      std::filesystem::path("Scripts");
+#else
+    auto executable = std::filesystem::path(data_folder_path) /
+                      std::filesystem::path(model_config.environment) /
+                      std::filesystem::path("bin");
+#endif
+    std::cout << "executable string: " << executable.string()
+              << data_folder_path << " " << model_config.environment
+              << std::endl;
+    auto executable_str =
+        (executable / std::filesystem::path(model_config.command[0])).string();
+    std::cout << "executable string: " << executable_str << std::endl;
+    auto command = model_config.command;
+    command[0] = executable_str;
+    command.push_back((std::filesystem::path(model_folder_path) /
+                       std::filesystem::path(model_config.script))
+                          .string());
+    std::list<std::string> args{"--port",      model_config.port,
+                                "--log_path",  model_config.log_path,
+                                "--log_level", model_config.log_level};
+    if (!model_config.extra_params.isNull() &&
+        model_config.extra_params.isObject()) {
+      for (const auto& key : model_config.extra_params.getMemberNames()) {
+        const Json::Value& value = model_config.extra_params[key];
+
+        // Convert key to string with -- prefix
+        std::string param_key = "--" + key;
+
+        // Handle different JSON value types
+        if (value.isString()) {
+          args.emplace_back(param_key);
+          args.emplace_back(value.asString());
+        } else if (value.isInt()) {
+          args.emplace_back(param_key);
+          args.emplace_back(std::to_string(value.asInt()));
+        } else if (value.isDouble()) {
+          args.emplace_back(param_key);
+          args.emplace_back(std::to_string(value.asDouble()));
+        } else if (value.isBool()) {
+          // For boolean, only add the flag if true
+          if (value.asBool()) {
+            args.emplace_back(param_key);
+          }
+        }
+      }
+    }
+
+    // Add the parsed arguments to the command
+    command.insert(command.end(), args.begin(), args.end());
+    pid = SpawnProcess(model, command);
+    if (pid == -1) {
+      std::unique_lock lock(models_mutex_);
+      if (models_.find(model) != models_.end()) {
+        models_.erase(model);
+      }
+
+      Json::Value error;
+      error["error"] = "Fail to spawn process with pid -1";
+      Json::Value status;
+      status["is_done"] = true;
+      status["has_error"] = true;
+      status["is_stream"] = false;
+      status["status_code"] = k500InternalServerError;
+      callback(std::move(status), std::move(error));
+      return;
+    }
+  } catch (const std::exception& e) {
+    std::unique_lock lock(models_mutex_);
+    if (models_.find(model) != models_.end()) {
+      models_.erase(model);
+    }
+
+    Json::Value error;
+    error["error"] = e.what();
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k500InternalServerError;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  Json::Value response;
+  response["status"] =
+      "Model loaded successfully with pid: " + std::to_string(pid);
+  Json::Value status;
+  status["is_done"] = true;
+  status["has_error"] = false;
+  status["is_stream"] = false;
+  status["status_code"] = k200OK;
+  callback(std::move(status), std::move(response));
+}
+
+void PythonEngine::UnloadModel(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+  if (!json_body->isMember("model")) {
+    Json::Value error;
+    error["error"] = "Missing required field: model";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  const std::string& model = (*json_body)["model"].asString();
+
+  {
+    std::unique_lock lock(models_mutex_);
+    if (TerminateProcess(model)) {
+      models_.erase(model);
+    } else {
+      Json::Value error;
+      error["error"] = "Fail to terminate process with id: " +
+                       std::to_string(processMap[model]);
+      Json::Value status;
+      status["is_done"] = true;
+      status["has_error"] = true;
+      status["is_stream"] = false;
+      status["status_code"] = k400BadRequest;
+      callback(std::move(status), std::move(error));
+      return;
+    }
+  }
+
+  Json::Value response;
+  response["status"] = "Model unloaded successfully";
+  Json::Value status;
+  status["is_done"] = true;
+  status["has_error"] = false;
+  status["is_stream"] = false;
+  status["status_code"] = k200OK;
+  callback(std::move(status), std::move(response));
+}
+
+void PythonEngine::HandleChatCompletion(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {}
+
+void PythonEngine::HandleRequest(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+  if (!json_body->isMember("model") || !json_body->isMember("method") ||
+      !json_body->isMember("path")) {
+    Json::Value error;
+    error["error"] =
+        "Missing required field: model, method and path are required!";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+  std::string method = (*json_body)["method"].asString();
+  std::string path = (*json_body)["path"].asString();
+  std::string transform_request =
+      (*json_body).get("transform_request", "").asString();
+  std::string transform_response =
+      (*json_body).get("transform_response", "").asString();
+  std::string model = (*json_body)["model"].asString();
+  Json::Value body = (*json_body)["body"];
+
+  // Transform Request
+  std::string transformed_request;
+  if (!transform_request.empty()) {
+
+    try {
+      // Validate JSON body
+      if (!body || body.isNull()) {
+        throw std::runtime_error("Invalid or null JSON body");
+      }
+
+      // Render with error handling
+      try {
+        transformed_request = renderer_.Render(transform_request, *json_body);
+      } catch (const std::exception& e) {
+        throw std::runtime_error("Template rendering error: " +
+                                 std::string(e.what()));
+      }
+    } catch (const std::exception& e) {
+      // Log error and potentially rethrow or handle accordingly
+      LOG_WARN << "Error in TransformRequest: " << e.what();
+      LOG_WARN << "Using original request body";
+      transformed_request = body.toStyledString();
+    }
+  } else {
+    transformed_request = body.toStyledString();
+  }
+
+  // End Transform request
+
+  CurlResponse response;
+  if (method == "post") {
+    response = MakePostRequest(model, path, transformed_request);
+  } else if (method == "get") {
+    response = MakeGetRequest(model, path);
+  } else if (method == "delete") {
+    response = MakeDeleteRequest(model, path);
+  } else {
+    Json::Value error;
+    error["error"] =
+        "method not supported! Supported methods are: post, get, delete";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  if (response.error) {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    Json::Value error;
+    error["error"] = response.error_message;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  Json::Value response_json;
+  Json::Reader reader;
+  if (!reader.parse(response.body, response_json)) {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k500InternalServerError;
+    Json::Value error;
+    error["error"] = "Failed to parse response";
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  if (!transform_response.empty()) {
+    // Transform Response
+    std::string response_str;
+    try {
+      // Validate JSON body
+      if (!response_json || response_json.isNull()) {
+        throw std::runtime_error("Invalid or null JSON body");
+      }
+      // Render with error handling
+      try {
+        response_str = renderer_.Render(transform_response, response_json);
+      } catch (const std::exception& e) {
+        throw std::runtime_error("Template rendering error: " +
+                                 std::string(e.what()));
+      }
+    } catch (const std::exception& e) {
+      // Log error and potentially rethrow or handle accordingly
+      LOG_WARN << "Error in TransformRequest: " << e.what();
+      LOG_WARN << "Using original request body";
+      response_str = response_json.toStyledString();
+    }
+
+    Json::Reader reader_final;
+    Json::Value response_json_final;
+    if (!reader_final.parse(response_str, response_json_final)) {
+      Json::Value status;
+      status["is_done"] = true;
+      status["has_error"] = true;
+      status["is_stream"] = false;
+      status["status_code"] = k500InternalServerError;
+      Json::Value error;
+      error["error"] = "Failed to parse response";
+      callback(std::move(status), std::move(error));
+      return;
+    }
+
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = false;
+    status["is_stream"] = false;
+    status["status_code"] = k200OK;
+
+    callback(std::move(status), std::move(response_json_final));
+  } else {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = false;
+    status["is_stream"] = false;
+    status["status_code"] = k200OK;
+
+    callback(std::move(status), std::move(response_json));
+  }
+}
+
+void PythonEngine::GetModelStatus(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+  if (!json_body->isMember("model")) {
+    Json::Value error;
+    error["error"] = "Missing required field: model";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+  auto model = json_body->get("model", "").asString();
+  auto model_config = models_[model];
+  auto health_endpoint = model_config.heath_check;
+  auto response_health = MakeGetRequest(model, health_endpoint.path);
+
+  if (response_health.error) {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    Json::Value error;
+    error["error"] = response_health.error_message;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  Json::Value response;
+  response["model"] = model;
+  response["model_loaded"] = true;
+  response["model_data"] = model_config.ToJson();
+
+  Json::Value status;
+  status["is_done"] = true;
+  status["has_error"] = false;
+  status["is_stream"] = false;
+  status["status_code"] = k200OK;
+  callback(std::move(status), std::move(response));
+}
+
+// Implement remaining virtual functions
+void PythonEngine::HandleEmbedding(
+    std::shared_ptr<Json::Value>,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+  callback(Json::Value(), Json::Value());
+}
+
+bool PythonEngine::IsSupported(const std::string& f) {
+  if (f == "HandleChatCompletion" || f == "LoadModel" || f == "UnloadModel" ||
+      f == "GetModelStatus" || f == "GetModels" || f == "SetFileLogger" ||
+      f == "SetLogLevel") {
+    return true;
+  }
+  return false;
+}
+
+bool PythonEngine::SetFileLogger(int max_log_lines,
+                                 const std::string& log_path) {
+  if (!async_file_logger_) {
+    async_file_logger_ = std::make_unique<trantor::FileLogger>();
+  }
+
+  async_file_logger_->setFileName(log_path);
+  async_file_logger_->setMaxLines(max_log_lines);  // Keep last 100000 lines
+  async_file_logger_->startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        if (async_file_logger_)
+          async_file_logger_->output_(msg, len);
+      },
+      [&]() {
+        if (async_file_logger_)
+          async_file_logger_->flush();
+      });
+  freopen(log_path.c_str(), "w", stderr);
+  freopen(log_path.c_str(), "w", stdout);
+  return true;
+}
+
+void PythonEngine::SetLogLevel(trantor::Logger::LogLevel log_level) {
+  trantor::Logger::setLogLevel(log_level);
+}
+
+void PythonEngine::RegisterLibraryPath(RegisterLibraryOption opts) {
+
+};
+
+void PythonEngine::Load(EngineLoadOption opts) {
+  // Develop register model here on loading engine
+};
+
+void PythonEngine::Unload(EngineUnloadOption opts) {};
+
+// extern "C" {
+// EngineI* get_engine() {
+//   return new PythonEngine();
+// }
+// }
+}  // namespace python_engine
\ No newline at end of file
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index e69de29bb..fbc88b40e 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -0,0 +1,160 @@
+#pragma once
+
+#include <curl/curl.h>
+#include <json/json.h>
+#include <yaml-cpp/yaml.h>
+#include <mutex>
+#include <shared_mutex>
+#include <string>
+#include <unordered_map>
+#include "cortex-common/EngineI.h"
+#include "extensions/template_renderer.h"
+#include "utils/file_logger.h"
+#include "config/model_config.h"
+#ifdef _WIN32
+    #include <windows.h>
+    #include <process.h>
+#elif __APPLE__ || __linux__
+    #include <unistd.h>
+    #include <sys/types.h>
+    #include <sys/wait.h>
+    #include <spawn.h>
+    #include <signal.h>
+#endif
+// Helper for CURL response
+namespace python_engine{
+struct StreamContext
+{
+    std::shared_ptr<std::function<void(Json::Value &&, Json::Value &&)>> callback;
+    std::string buffer;
+};
+
+static size_t StreamWriteCallback(char *ptr, size_t size, size_t nmemb,
+                                  void *userdata)
+{
+    auto *context = static_cast<StreamContext *>(userdata);
+    std::string chunk(ptr, size * nmemb);
+
+    context->buffer += chunk;
+
+    // Process complete lines
+    size_t pos;
+    while ((pos = context->buffer.find('\n')) != std::string::npos)
+    {
+        std::string line = context->buffer.substr(0, pos);
+        context->buffer = context->buffer.substr(pos + 1);
+
+        // Skip empty lines
+        if (line.empty() || line == "\r")
+            continue;
+
+        // Remove "data: " prefix if present
+        // if (line.substr(0, 6) == "data: ")
+        // {
+        //     line = line.substr(6);
+        // }
+
+        // Skip [DONE] message
+        std::cout << line << std::endl;
+        if (line == "data: [DONE]")
+        {
+            Json::Value status;
+            status["is_done"] = true;
+            status["has_error"] = false;
+            status["is_stream"] = true;
+            status["status_code"] = 200;
+            (*context->callback)(std::move(status), Json::Value());
+            break;
+        }
+
+        // Parse the JSON
+        Json::Value chunk_json;
+        chunk_json["data"] = line + "\n\n";
+        Json::Reader reader;
+
+        Json::Value status;
+        status["is_done"] = false;
+        status["has_error"] = false;
+        status["is_stream"] = true;
+        status["status_code"] = 200;
+        (*context->callback)(std::move(status), std::move(chunk_json));
+    }
+
+    return size * nmemb;
+}
+
+struct CurlResponse
+{
+    std::string body;
+    bool error{false};
+    std::string error_message;
+};
+
+class PythonEngine : public EngineI
+{
+private:
+    // Model configuration
+
+    // Thread-safe model config storage
+    mutable std::shared_mutex models_mutex_;
+    std::unordered_map<std::string, config::PythonModelConfig> models_;
+    extensions::TemplateRenderer renderer_;
+    std::unique_ptr<trantor::FileLogger> async_file_logger_;
+    std::unordered_map<std::string, pid_t> processMap;
+
+    // Helper functions
+    CurlResponse MakePostRequest(const std::string &model, const std::string &path,
+                                 const std::string &body);
+    CurlResponse MakeGetRequest(const std::string &model, const std::string &path);
+    CurlResponse MakeDeleteRequest(const std::string &model, const std::string &path);
+
+    // Process manager functions
+    pid_t SpawnProcess(const std::string& model, const std::vector<std::string>& command) ;
+    bool TerminateProcess(const std::string& model);
+
+    // Internal model management
+    bool LoadModelConfig(const std::string &model, const std::string &yaml_path);
+    config::PythonModelConfig *GetModelConfig(const std::string &model);
+
+public:
+    PythonEngine();
+    ~PythonEngine();
+    void RegisterLibraryPath(RegisterLibraryOption opts) override;
+
+    void Load(EngineLoadOption opts) override;
+
+    void Unload(EngineUnloadOption opts) override;
+
+    // Main interface implementations
+    void GetModels(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+
+    void HandleChatCompletion(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+
+    void LoadModel(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+
+    void UnloadModel(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+
+    void GetModelStatus(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+
+    // Other required virtual functions
+    void HandleEmbedding(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+    bool IsSupported(const std::string &feature) override;
+    bool SetFileLogger(int max_log_lines, const std::string &log_path) override;
+    void SetLogLevel(trantor::Logger::LogLevel logLevel) override;
+    void HandleRequest(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+};
+} // namespace python_engine
\ No newline at end of file
diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h
index 8ce6fa652..1dcf3777f 100644
--- a/engine/extensions/remote-engine/remote_engine.h
+++ b/engine/extensions/remote-engine/remote_engine.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <unordered_map>
 #include "cortex-common/remote_enginei.h"
-#include "extensions/remote-engine/template_renderer.h"
+#include "extensions/template_renderer.h"
 #include "utils/engine_constants.h"
 #include "utils/file_logger.h"
 // Helper for CURL response
@@ -47,7 +47,7 @@ class RemoteEngine : public RemoteEngineI {
   // Thread-safe model config storage
   mutable std::shared_mutex models_mtx_;
   std::unordered_map<std::string, ModelConfig> models_;
-  TemplateRenderer renderer_;
+  extensions::TemplateRenderer renderer_;
   Json::Value metadata_;
   std::string api_key_template_;
   std::unique_ptr<trantor::FileLogger> async_file_logger_;
diff --git a/engine/extensions/remote-engine/template_renderer.cc b/engine/extensions/remote-engine/template_renderer.cc
deleted file mode 100644
index 15514d17c..000000000
--- a/engine/extensions/remote-engine/template_renderer.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-#if defined(_WIN32) || defined(_WIN64)
-#define NOMINMAX
-#undef min
-#undef max
-#endif
-#include "template_renderer.h"
-#include <regex>
-#include <stdexcept>
-#include "utils/logging_utils.h"
-namespace remote_engine {
-TemplateRenderer::TemplateRenderer() {
-  // Configure Inja environment
-  env_.set_trim_blocks(true);
-  env_.set_lstrip_blocks(true);
-
-  // Add tojson function for all value types
-  env_.add_callback("tojson", 1, [](inja::Arguments& args) {
-    if (args.empty()) {
-      return nlohmann::json(nullptr);
-    }
-    const auto& value = *args[0];
-
-    if (value.is_string()) {
-      return nlohmann::json(std::string("\"") + value.get<std::string>() +
-                            "\"");
-    }
-    return value;
-  });
-}
-
-std::string TemplateRenderer::Render(const std::string& tmpl,
-                                     const Json::Value& data) {
-  try {
-    // Convert Json::Value to nlohmann::json
-    auto json_data = ConvertJsonValue(data);
-
-    // Create the input data structure expected by the template
-    nlohmann::json template_data;
-    template_data["input_request"] = json_data;
-
-    // Debug output
-    LOG_DEBUG << "Template: " << tmpl;
-    LOG_DEBUG << "Data: " << template_data.dump(2);
-
-    // Render template
-    std::string result = env_.render(tmpl, template_data);
-
-    // Clean up any potential double quotes in JSON strings
-    result = std::regex_replace(result, std::regex("\\\"\\\""), "\"");
-
-    LOG_DEBUG << "Result: " << result;
-
-    // Validate JSON
-    auto parsed = nlohmann::json::parse(result);
-
-    return result;
-  } catch (const std::exception& e) {
-    LOG_ERROR << "Template rendering failed: " << e.what();
-    LOG_ERROR << "Template: " << tmpl;
-    throw std::runtime_error(std::string("Template rendering failed: ") +
-                             e.what());
-  }
-}
-
-nlohmann::json TemplateRenderer::ConvertJsonValue(const Json::Value& input) {
-  if (input.isNull()) {
-    return nullptr;
-  } else if (input.isBool()) {
-    return input.asBool();
-  } else if (input.isInt()) {
-    return input.asInt();
-  } else if (input.isUInt()) {
-    return input.asUInt();
-  } else if (input.isDouble()) {
-    return input.asDouble();
-  } else if (input.isString()) {
-    return input.asString();
-  } else if (input.isArray()) {
-    nlohmann::json arr = nlohmann::json::array();
-    for (const auto& element : input) {
-      arr.push_back(ConvertJsonValue(element));
-    }
-    return arr;
-  } else if (input.isObject()) {
-    nlohmann::json obj = nlohmann::json::object();
-    for (const auto& key : input.getMemberNames()) {
-      obj[key] = ConvertJsonValue(input[key]);
-    }
-    return obj;
-  }
-  return nullptr;
-}
-
-Json::Value TemplateRenderer::ConvertNlohmannJson(const nlohmann::json& input) {
-  if (input.is_null()) {
-    return Json::Value();
-  } else if (input.is_boolean()) {
-    return Json::Value(input.get<bool>());
-  } else if (input.is_number_integer()) {
-    return Json::Value(input.get<int>());
-  } else if (input.is_number_unsigned()) {
-    return Json::Value(input.get<unsigned int>());
-  } else if (input.is_number_float()) {
-    return Json::Value(input.get<double>());
-  } else if (input.is_string()) {
-    return Json::Value(input.get<std::string>());
-  } else if (input.is_array()) {
-    Json::Value arr(Json::arrayValue);
-    for (const auto& element : input) {
-      arr.append(ConvertNlohmannJson(element));
-    }
-    return arr;
-  } else if (input.is_object()) {
-    Json::Value obj(Json::objectValue);
-    for (auto it = input.begin(); it != input.end(); ++it) {
-      obj[it.key()] = ConvertNlohmannJson(it.value());
-    }
-    return obj;
-  }
-  return Json::Value();
-}
-
-std::string TemplateRenderer::RenderFile(const std::string& template_path,
-                                         const Json::Value& data) {
-  try {
-    // Convert Json::Value to nlohmann::json
-    auto json_data = ConvertJsonValue(data);
-
-    // Load and render template
-    return env_.render_file(template_path, json_data);
-  } catch (const std::exception& e) {
-    throw std::runtime_error(std::string("Template file rendering failed: ") +
-                             e.what());
-  }
-}
-}  // namespace remote_engine
\ No newline at end of file
diff --git a/engine/extensions/remote-engine/template_renderer.h b/engine/extensions/remote-engine/template_renderer.h
deleted file mode 100644
index f59e7cc93..000000000
--- a/engine/extensions/remote-engine/template_renderer.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-#include <memory>
-
-#include <string>
-#include "json/json.h"
-#include "trantor/utils/Logger.h"
-// clang-format off
-#if defined(_WIN32) || defined(_WIN64)
-#define NOMINMAX
-#undef min
-#undef max
-#endif
-#include <nlohmann/json.hpp>
-#include <inja/inja.hpp>
-// clang-format on
-namespace remote_engine {
-class TemplateRenderer {
- public:
-  TemplateRenderer();
-  ~TemplateRenderer() = default;
-
-  // Convert Json::Value to nlohmann::json
-  static nlohmann::json ConvertJsonValue(const Json::Value& input);
-
-  // Convert nlohmann::json to Json::Value
-  static Json::Value ConvertNlohmannJson(const nlohmann::json& input);
-
-  // Render template with data
-  std::string Render(const std::string& tmpl, const Json::Value& data);
-
-  // Load template from file and render
-  std::string RenderFile(const std::string& template_path,
-                         const Json::Value& data);
-
- private:
-  inja::Environment env_;
-};
-
-}  // namespace remote_engine
\ No newline at end of file

From accec0a9db21257d8c4c11c6af8f52e573cf6783 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Wed, 11 Dec 2024 17:02:40 +0700
Subject: [PATCH 08/34] Fix: CI build window

---
 .../extensions/python-engine/python_engine.h  | 269 +++++++++---------
 1 file changed, 134 insertions(+), 135 deletions(-)

diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index fbc88b40e..4cbda3999 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -7,154 +7,153 @@
 #include <shared_mutex>
 #include <string>
 #include <unordered_map>
+#include "config/model_config.h"
 #include "cortex-common/EngineI.h"
 #include "extensions/template_renderer.h"
 #include "utils/file_logger.h"
-#include "config/model_config.h"
 #ifdef _WIN32
-    #include <windows.h>
-    #include <process.h>
+#include <process.h>
+#include <windows.h>
+using pid_t = DWORD;
 #elif __APPLE__ || __linux__
-    #include <unistd.h>
-    #include <sys/types.h>
-    #include <sys/wait.h>
-    #include <spawn.h>
-    #include <signal.h>
+#include <signal.h>
+#include <spawn.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
 #endif
 // Helper for CURL response
-namespace python_engine{
-struct StreamContext
-{
-    std::shared_ptr<std::function<void(Json::Value &&, Json::Value &&)>> callback;
-    std::string buffer;
+namespace python_engine {
+struct StreamContext {
+  std::shared_ptr<std::function<void(Json::Value&&, Json::Value&&)>> callback;
+  std::string buffer;
 };
 
-static size_t StreamWriteCallback(char *ptr, size_t size, size_t nmemb,
-                                  void *userdata)
-{
-    auto *context = static_cast<StreamContext *>(userdata);
-    std::string chunk(ptr, size * nmemb);
-
-    context->buffer += chunk;
-
-    // Process complete lines
-    size_t pos;
-    while ((pos = context->buffer.find('\n')) != std::string::npos)
-    {
-        std::string line = context->buffer.substr(0, pos);
-        context->buffer = context->buffer.substr(pos + 1);
-
-        // Skip empty lines
-        if (line.empty() || line == "\r")
-            continue;
-
-        // Remove "data: " prefix if present
-        // if (line.substr(0, 6) == "data: ")
-        // {
-        //     line = line.substr(6);
-        // }
-
-        // Skip [DONE] message
-        std::cout << line << std::endl;
-        if (line == "data: [DONE]")
-        {
-            Json::Value status;
-            status["is_done"] = true;
-            status["has_error"] = false;
-            status["is_stream"] = true;
-            status["status_code"] = 200;
-            (*context->callback)(std::move(status), Json::Value());
-            break;
-        }
-
-        // Parse the JSON
-        Json::Value chunk_json;
-        chunk_json["data"] = line + "\n\n";
-        Json::Reader reader;
-
-        Json::Value status;
-        status["is_done"] = false;
-        status["has_error"] = false;
-        status["is_stream"] = true;
-        status["status_code"] = 200;
-        (*context->callback)(std::move(status), std::move(chunk_json));
+static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb,
+                                  void* userdata) {
+  auto* context = static_cast<StreamContext*>(userdata);
+  std::string chunk(ptr, size * nmemb);
+
+  context->buffer += chunk;
+
+  // Process complete lines
+  size_t pos;
+  while ((pos = context->buffer.find('\n')) != std::string::npos) {
+    std::string line = context->buffer.substr(0, pos);
+    context->buffer = context->buffer.substr(pos + 1);
+
+    // Skip empty lines
+    if (line.empty() || line == "\r")
+      continue;
+
+    // Remove "data: " prefix if present
+    // if (line.substr(0, 6) == "data: ")
+    // {
+    //     line = line.substr(6);
+    // }
+
+    // Skip [DONE] message
+    std::cout << line << std::endl;
+    if (line == "data: [DONE]") {
+      Json::Value status;
+      status["is_done"] = true;
+      status["has_error"] = false;
+      status["is_stream"] = true;
+      status["status_code"] = 200;
+      (*context->callback)(std::move(status), Json::Value());
+      break;
     }
 
-    return size * nmemb;
+    // Parse the JSON
+    Json::Value chunk_json;
+    chunk_json["data"] = line + "\n\n";
+    Json::Reader reader;
+
+    Json::Value status;
+    status["is_done"] = false;
+    status["has_error"] = false;
+    status["is_stream"] = true;
+    status["status_code"] = 200;
+    (*context->callback)(std::move(status), std::move(chunk_json));
+  }
+
+  return size * nmemb;
 }
 
-struct CurlResponse
-{
-    std::string body;
-    bool error{false};
-    std::string error_message;
+struct CurlResponse {
+  std::string body;
+  bool error{false};
+  std::string error_message;
 };
 
-class PythonEngine : public EngineI
-{
-private:
-    // Model configuration
-
-    // Thread-safe model config storage
-    mutable std::shared_mutex models_mutex_;
-    std::unordered_map<std::string, config::PythonModelConfig> models_;
-    extensions::TemplateRenderer renderer_;
-    std::unique_ptr<trantor::FileLogger> async_file_logger_;
-    std::unordered_map<std::string, pid_t> processMap;
-
-    // Helper functions
-    CurlResponse MakePostRequest(const std::string &model, const std::string &path,
-                                 const std::string &body);
-    CurlResponse MakeGetRequest(const std::string &model, const std::string &path);
-    CurlResponse MakeDeleteRequest(const std::string &model, const std::string &path);
-
-    // Process manager functions
-    pid_t SpawnProcess(const std::string& model, const std::vector<std::string>& command) ;
-    bool TerminateProcess(const std::string& model);
-
-    // Internal model management
-    bool LoadModelConfig(const std::string &model, const std::string &yaml_path);
-    config::PythonModelConfig *GetModelConfig(const std::string &model);
-
-public:
-    PythonEngine();
-    ~PythonEngine();
-    void RegisterLibraryPath(RegisterLibraryOption opts) override;
-
-    void Load(EngineLoadOption opts) override;
-
-    void Unload(EngineUnloadOption opts) override;
-
-    // Main interface implementations
-    void GetModels(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
-
-    void HandleChatCompletion(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
-
-    void LoadModel(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
-
-    void UnloadModel(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
-
-    void GetModelStatus(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
-
-    // Other required virtual functions
-    void HandleEmbedding(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
-    bool IsSupported(const std::string &feature) override;
-    bool SetFileLogger(int max_log_lines, const std::string &log_path) override;
-    void SetLogLevel(trantor::Logger::LogLevel logLevel) override;
-    void HandleRequest(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) override;
+class PythonEngine : public EngineI {
+ private:
+  // Model configuration
+
+  // Thread-safe model config storage
+  mutable std::shared_mutex models_mutex_;
+  std::unordered_map<std::string, config::PythonModelConfig> models_;
+  extensions::TemplateRenderer renderer_;
+  std::unique_ptr<trantor::FileLogger> async_file_logger_;
+  std::unordered_map<std::string, pid_t> processMap;
+
+  // Helper functions
+  CurlResponse MakePostRequest(const std::string& model,
+                               const std::string& path,
+                               const std::string& body);
+  CurlResponse MakeGetRequest(const std::string& model,
+                              const std::string& path);
+  CurlResponse MakeDeleteRequest(const std::string& model,
+                                 const std::string& path);
+
+  // Process manager functions
+  pid_t SpawnProcess(const std::string& model,
+                     const std::vector<std::string>& command);
+  bool TerminateProcess(const std::string& model);
+
+  // Internal model management
+  bool LoadModelConfig(const std::string& model, const std::string& yaml_path);
+  config::PythonModelConfig* GetModelConfig(const std::string& model);
+
+ public:
+  PythonEngine();
+  ~PythonEngine();
+  void RegisterLibraryPath(RegisterLibraryOption opts) override;
+
+  void Load(EngineLoadOption opts) override;
+
+  void Unload(EngineUnloadOption opts) override;
+
+  // Main interface implementations
+  void GetModels(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+  void HandleChatCompletion(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+  void LoadModel(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+  void UnloadModel(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+  void GetModelStatus(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+  // Other required virtual functions
+  void HandleEmbedding(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+  bool IsSupported(const std::string& feature) override;
+  bool SetFileLogger(int max_log_lines, const std::string& log_path) override;
+  void SetLogLevel(trantor::Logger::LogLevel logLevel) override;
+  void HandleRequest(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
 };
-} // namespace python_engine
\ No newline at end of file
+}  // namespace python_engine
\ No newline at end of file

From 6a8bebf227ebb0e1dde259fc47586a09b5b5f37b Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Wed, 11 Dec 2024 17:10:05 +0700
Subject: [PATCH 09/34] Fix: CI build window

---
 engine/extensions/python-engine/python_engine.cc | 4 ++--
 engine/extensions/python-engine/python_engine.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 83f85126c..6ab8c0e48 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -135,7 +135,7 @@ pid_t PythonEngine::SpawnProcess(const std::string& model,
     return -1;
   }
 }
-bool PythonEngine::TerminateProcess(const std::string& model) {
+bool PythonEngine::TerminateModelProcess(const std::string& model) {
   auto it = processMap.find(model);
   if (it == processMap.end()) {
     LOG_ERROR << "No process found for model: " << model
@@ -479,7 +479,7 @@ void PythonEngine::UnloadModel(
 
   {
     std::unique_lock lock(models_mutex_);
-    if (TerminateProcess(model)) {
+    if (TerminateModelProcess(model)) {
       models_.erase(model);
     } else {
       Json::Value error;
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index 4cbda3999..1b0a88d1a 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -109,7 +109,7 @@ class PythonEngine : public EngineI {
   // Process manager functions
   pid_t SpawnProcess(const std::string& model,
                      const std::vector<std::string>& command);
-  bool TerminateProcess(const std::string& model);
+  bool TerminateModelProcess(const std::string& model);
 
   // Internal model management
   bool LoadModelConfig(const std::string& model, const std::string& yaml_path);

From 36f29bfeb66b2ac350911eb28c941b2b32ee4d66 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 12 Dec 2024 16:15:24 +0700
Subject: [PATCH 10/34] feat: support download python model from cortexso

---
 engine/config/model_config.h        | 23 ++++++++++++++---------
 engine/services/download_service.cc |  3 +++
 engine/services/model_service.cc    | 24 +++++++++++++++++-------
 engine/utils/curl_utils.cc          | 25 +++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index 78f62d9ca..55218fca7 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -547,7 +547,7 @@ struct PythonModelConfig {
 
   // Model Load Parameters
   std::string port;
-  std::string files;
+  std::string model_location;
   std::string script;
   std::string log_path;
   std::string log_level;
@@ -557,7 +557,7 @@ struct PythonModelConfig {
   Json::Value extra_params;  // Accept dynamic extra parameters
 
   // Method to convert C++ struct to YAML
-  std::string ToYaml() const {
+  void ToYaml(const std::string & filepath) const {
     YAML::Emitter out;
     out << YAML::BeginMap;
 
@@ -597,7 +597,7 @@ struct PythonModelConfig {
 
     // Model Load Parameters
     out << YAML::Key << "port" << YAML::Value << port;
-    out << YAML::Key << "files" << YAML::Value << files;
+    out << YAML::Key << "model_location" << YAML::Value << model_location;
     out << YAML::Key << "script" << YAML::Value << script;
     out << YAML::Key << "log_path" << YAML::Value << log_path;
     out << YAML::Key << "log_level" << YAML::Value << log_level;
@@ -620,7 +620,12 @@ struct PythonModelConfig {
           << iter->asString();
     }
     out << YAML::EndMap;
-    return out.c_str();
+
+    std::ofstream fout(filepath);
+    if (!fout.is_open()) {
+      throw std::runtime_error("Failed to open file for writing: " + filepath);
+    }
+    fout << out.c_str();
   }
 
   // Method to populate struct from YAML file
@@ -669,8 +674,8 @@ struct PythonModelConfig {
     auto mlp = config;
     if (mlp["port"])
       port = mlp["port"].as<std::string>();
-    if (mlp["files"])
-      files = mlp["files"].as<std::string>();
+    if (mlp["model_location"])
+      model_location = mlp["model_location"].as<std::string>();
     if (mlp["script"])
       script = mlp["script"].as<std::string>();
     if (mlp["log_path"])
@@ -730,7 +735,7 @@ struct PythonModelConfig {
     root["log_path"] = log_path;
     root["log_level"] = log_level;
     root["environment"] = environment;
-    root["files"] = files;
+    root["model_location"] = model_location;
     root["script"] = script;
 
     // Serialize command as JSON array
@@ -806,8 +811,8 @@ struct PythonModelConfig {
       environment = mlp["environment"].asString();
     if (mlp.isMember("engine"))
       engine = mlp["engine"].asString();
-    if (mlp.isMember("files"))
-      files = mlp["files"].asString();
+    if (mlp.isMember("model_location"))
+      model_location = mlp["model_location"].asString();
     if (mlp.isMember("script"))
       script = mlp["script"].asString();
 
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index d855c8f61..9c7137168 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -374,6 +374,9 @@ void DownloadService::ProcessTask(DownloadTask& task, int worker_id) {
       CTL_ERR("Failed to init curl!");
       return;
     }
+    if (!std::filesystem::exists(item.localPath.parent_path())) {
+      std::filesystem::create_directories(item.localPath.parent_path());
+    }
     auto file = fopen(item.localPath.string().c_str(), "wb");
     if (!file) {
       CTL_ERR("Failed to open output file " + item.localPath.string());
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 7f79ddaf7..ab20734dc 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -96,6 +96,7 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
   file_manager_utils::CreateDirectoryRecursively(model_container_path.string());
 
   for (const auto& value : result.value()) {
+    // std::cout << "value object: " << value.toStyledString() << std::endl;
     auto path = value["path"].asString();
     if (path == ".gitattributes" || path == ".gitignore" ||
         path == "README.md") {
@@ -517,15 +518,24 @@ ModelService::DownloadModelFromCortexsoAsync(
     config::YamlHandler yaml_handler;
     yaml_handler.ModelConfigFromFile(model_yml_item->localPath.string());
     auto mc = yaml_handler.GetModelConfig();
-    mc.model = unique_model_id;
+    if (mc.engine == kPythonEngine) {  // process for Python engine
+      config::PythonModelConfig python_model_config;
+      python_model_config.ReadFromYaml(model_yml_item->localPath.string());
+      python_model_config.model_location =
+          model_yml_item->localPath.parent_path().string();
+      python_model_config.ToYaml(model_yml_item->localPath.string());
 
-    uint64_t model_size = 0;
-    for (const auto& item : finishedTask.items) {
-      model_size = model_size + item.bytes.value_or(0);
+    } else {
+      mc.model = unique_model_id;
+
+      uint64_t model_size = 0;
+      for (const auto& item : finishedTask.items) {
+        model_size = model_size + item.bytes.value_or(0);
+      }
+      mc.size = model_size;
+      yaml_handler.UpdateModelConfig(mc);
+      yaml_handler.WriteYamlFile(model_yml_item->localPath.string());
     }
-    mc.size = model_size;
-    yaml_handler.UpdateModelConfig(mc);
-    yaml_handler.WriteYamlFile(model_yml_item->localPath.string());
 
     auto rel =
         file_manager_utils::ToRelativeCortexDataPath(model_yml_item->localPath);
diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc
index 71f263a6a..9ba0f5a76 100644
--- a/engine/utils/curl_utils.cc
+++ b/engine/utils/curl_utils.cc
@@ -257,6 +257,31 @@ cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
                      " parsing error: " + reader.getFormattedErrorMessages());
   }
 
+  if (root.isArray()) {
+    for (const auto& value : root) {
+      if (value["type"].asString() == "directory") {
+        auto temp =
+            SimpleGetJson(url + "/" + value["path"].asString(), timeout);
+        if (!temp.has_error()) {
+          if (temp.value().isArray()) {
+            for (const auto& item : temp.value()) {
+              root.append(item);
+            }
+          } else {
+            root.append(temp.value());
+          }
+        }
+      }
+    }
+   for (Json::ArrayIndex i = 0; i < root.size();) {
+        if (root[i].isMember("type") && root[i]["type"] == "directory") {
+            root.removeIndex(i, nullptr);
+        } else {
+            ++i;
+        }
+    }
+   
+  }
   return root;
 }
 

From 10d53a13079ab74e078e46530ef005c6be1fd04d Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 12 Dec 2024 17:25:32 +0700
Subject: [PATCH 11/34] feat: add inference interface

---
 engine/cortex-common/EngineI.h                |   3 +
 .../extensions/python-engine/python_engine.cc | 151 ++++++++++++++++++
 .../extensions/python-engine/python_engine.h  |   3 +
 3 files changed, 157 insertions(+)

diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index 466d32c79..ebc0674e1 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -72,6 +72,9 @@ class EngineI {
   virtual void HandleRequest(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
+  virtual void HandleInference(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) = 0;
 
   // Stop inflight chat completion in stream mode
   virtual void StopInferencing(const std::string& model_id) = 0;
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 6ab8c0e48..d61894543 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -509,6 +509,157 @@ void PythonEngine::HandleChatCompletion(
     std::shared_ptr<Json::Value> json_body,
     std::function<void(Json::Value&&, Json::Value&&)>&& callback) {}
 
+void PythonEngine::HandleInference(
+    std::shared_ptr<Json::Value> json_body,
+    std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+  if (!json_body->isMember("model")) {
+    Json::Value error;
+    error["error"] = "Missing required field: model is required!";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+  std::string method = "post";
+  std::string path = "/inference";
+  std::string transform_request =
+      (*json_body).get("transform_request", "").asString();
+  std::string transform_response =
+      (*json_body).get("transform_response", "").asString();
+  std::string model = (*json_body)["model"].asString();
+  Json::Value body = (*json_body)["body"];
+
+  // Transform Request
+  std::string transformed_request;
+  if (!transform_request.empty()) {
+
+    try {
+      // Validate JSON body
+      if (!body || body.isNull()) {
+        throw std::runtime_error("Invalid or null JSON body");
+      }
+
+      // Render with error handling
+      try {
+        transformed_request = renderer_.Render(transform_request, *json_body);
+      } catch (const std::exception& e) {
+        throw std::runtime_error("Template rendering error: " +
+                                 std::string(e.what()));
+      }
+    } catch (const std::exception& e) {
+      // Log error and potentially rethrow or handle accordingly
+      LOG_WARN << "Error in TransformRequest: " << e.what();
+      LOG_WARN << "Using original request body";
+      transformed_request = body.toStyledString();
+    }
+  } else {
+    transformed_request = body.toStyledString();
+  }
+
+  // End Transform request
+
+  CurlResponse response;
+  if (method == "post") {
+    response = MakePostRequest(model, path, transformed_request);
+  } else if (method == "get") {
+    response = MakeGetRequest(model, path);
+  } else if (method == "delete") {
+    response = MakeDeleteRequest(model, path);
+  } else {
+    Json::Value error;
+    error["error"] =
+        "method not supported! Supported methods are: post, get, delete";
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  if (response.error) {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k400BadRequest;
+    Json::Value error;
+    error["error"] = response.error_message;
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  Json::Value response_json;
+  Json::Reader reader;
+  if (!reader.parse(response.body, response_json)) {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = true;
+    status["is_stream"] = false;
+    status["status_code"] = k500InternalServerError;
+    Json::Value error;
+    error["error"] = "Failed to parse response";
+    callback(std::move(status), std::move(error));
+    return;
+  }
+
+  if (!transform_response.empty()) {
+    // Transform Response
+    std::string response_str;
+    try {
+      // Validate JSON body
+      if (!response_json || response_json.isNull()) {
+        throw std::runtime_error("Invalid or null JSON body");
+      }
+      // Render with error handling
+      try {
+        response_str = renderer_.Render(transform_response, response_json);
+      } catch (const std::exception& e) {
+        throw std::runtime_error("Template rendering error: " +
+                                 std::string(e.what()));
+      }
+    } catch (const std::exception& e) {
+      // Log error and potentially rethrow or handle accordingly
+      LOG_WARN << "Error in TransformRequest: " << e.what();
+      LOG_WARN << "Using original request body";
+      response_str = response_json.toStyledString();
+    }
+
+    Json::Reader reader_final;
+    Json::Value response_json_final;
+    if (!reader_final.parse(response_str, response_json_final)) {
+      Json::Value status;
+      status["is_done"] = true;
+      status["has_error"] = true;
+      status["is_stream"] = false;
+      status["status_code"] = k500InternalServerError;
+      Json::Value error;
+      error["error"] = "Failed to parse response";
+      callback(std::move(status), std::move(error));
+      return;
+    }
+
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = false;
+    status["is_stream"] = false;
+    status["status_code"] = k200OK;
+
+    callback(std::move(status), std::move(response_json_final));
+  } else {
+    Json::Value status;
+    status["is_done"] = true;
+    status["has_error"] = false;
+    status["is_stream"] = false;
+    status["status_code"] = k200OK;
+
+    callback(std::move(status), std::move(response_json));
+  }
+}
 void PythonEngine::HandleRequest(
     std::shared_ptr<Json::Value> json_body,
     std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index 1b0a88d1a..f7fff434d 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -155,5 +155,8 @@ class PythonEngine : public EngineI {
   void HandleRequest(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+  virtual void HandleInference(
+        std::shared_ptr<Json::Value> json_body,
+        std::function<void(Json::Value &&, Json::Value &&)> &&callback) = 0;
 };
 }  // namespace python_engine
\ No newline at end of file

From 389dd8818090d75483e4aed41aa2146db7a35747 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Sat, 14 Dec 2024 20:01:56 +0700
Subject: [PATCH 12/34] feat: integrate to cortex cpp

---
 engine/CMakeLists.txt                         |  2 +-
 engine/common/base.h                          | 13 +++++
 engine/config/model_config.h                  | 13 +----
 engine/controllers/models.cc                  | 33 ++++++++++-
 engine/controllers/server.cc                  | 50 ++++++++++++++++
 engine/controllers/server.h                   | 12 +++-
 engine/cortex-common/EngineI.h                |  7 +--
 .../extensions/python-engine/python_engine.cc |  6 +-
 .../extensions/python-engine/python_engine.h  | 10 ++--
 engine/services/engine_service.cc             | 17 ++++--
 engine/services/inference_service.cc          | 58 +++++++++++++++++++
 engine/services/inference_service.h           |  8 ++-
 engine/services/model_service.cc              | 52 +++++++++++++----
 13 files changed, 242 insertions(+), 39 deletions(-)

diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index db34a8346..01d9571a8 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -144,7 +144,7 @@ add_executable(${TARGET_NAME} main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
 
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc
-
+    ${CMAKE_CURRENT_SOURCE_DIR}/extensions/python-engine/python_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc
diff --git a/engine/common/base.h b/engine/common/base.h
index 478cc7feb..c572a1823 100644
--- a/engine/common/base.h
+++ b/engine/common/base.h
@@ -46,3 +46,16 @@ class BaseEmbedding {
 
   // The derived class can also override other methods if needed
 };
+
+class BasePythonModel {
+ public:
+  virtual ~BasePythonModel() {}
+
+  // Model management
+  virtual void Inference(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
+  virtual void RouteRequest(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
+};
\ No newline at end of file
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index 55218fca7..ccf3e2ec0 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -750,17 +750,8 @@ struct PythonModelConfig {
   }
 
   // Method to populate struct from JSON
-  void FromJson(const std::string& jsonString) {
-    Json::CharReaderBuilder reader;
-    Json::Value root;
-    std::string errs;
-    std::istringstream s(jsonString);
-
-    if (!Json::parseFromStream(reader, s, &root, &errs)) {
-      std::cerr << "Error parsing JSON: " << errs << std::endl;
-      return;
-    }
-
+  void FromJson(const Json::Value& root) {
+   
     if (root.isMember("id"))
       id = root["id"].asString();
     if (root.isMember("model"))
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index affa45d52..23a50f1d1 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -211,6 +211,16 @@ void Models::ListModel(
           }
           data.append(std::move(obj));
           yaml_handler.Reset();
+        } else if (model_config.engine == kPythonEngine) {
+          config::PythonModelConfig python_model_config;
+          python_model_config.ReadFromYaml(
+              fmu::ToAbsoluteCortexDataPath(
+                  fs::path(model_entry.path_to_model_yaml))
+                  .string());
+          Json::Value obj = python_model_config.ToJson();
+          obj["id"] = model_entry.model;
+          obj["model"] = model_entry.model;
+          data.append(std::move(obj));
         } else {
           config::RemoteModelConfig remote_model_config;
           remote_model_config.LoadFromYamlFile(
@@ -282,7 +292,8 @@ void Models::GetModel(const HttpRequestPtr& req,
       auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret);
       resp->setStatusCode(drogon::k200OK);
       callback(resp);
-    } else {
+    } else if (model_config.engine == kOpenAiEngine ||
+               model_config.engine == kAnthropicEngine) {
       config::RemoteModelConfig remote_model_config;
       remote_model_config.LoadFromYamlFile(
           fmu::ToAbsoluteCortexDataPath(
@@ -295,6 +306,19 @@ void Models::GetModel(const HttpRequestPtr& req,
       auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
       resp->setStatusCode(k200OK);
       callback(resp);
+    } else {
+      config::PythonModelConfig python_model_config;
+      python_model_config.ReadFromYaml(
+          fmu::ToAbsoluteCortexDataPath(
+              fs::path(model_entry.value().path_to_model_yaml))
+              .string());
+      ret = python_model_config.ToJson();
+      ret["id"] = python_model_config.model;
+      ret["object"] = "model";
+      ret["result"] = "OK";
+      auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+      resp->setStatusCode(k200OK);
+      callback(resp);
     }
 
   } catch (const std::exception& e) {
@@ -353,6 +377,13 @@ void Models::UpdateModel(const HttpRequestPtr& req,
       yaml_handler.WriteYamlFile(yaml_fp.string());
       message = "Successfully update model ID '" + model_id +
                 "': " + json_body.toStyledString();
+    } else if (model_config.engine == kPythonEngine) {
+      config::PythonModelConfig python_model_config;
+      python_model_config.ReadFromYaml(yaml_fp.string());
+      python_model_config.FromJson(json_body);
+      python_model_config.ToYaml(yaml_fp.string());
+      message = "Successfully update model ID '" + model_id +
+                "': " + json_body.toStyledString();
     } else {
       config::RemoteModelConfig remote_model_config;
       remote_model_config.LoadFromYamlFile(yaml_fp.string());
diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index a9920e8aa..67133041d 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -120,6 +120,56 @@ void server::FineTuning(
   LOG_TRACE << "Done fine-tuning";
 }
 
+void server::Inference(const HttpRequestPtr& req,
+                       std::function<void(const HttpResponsePtr&)>&& callback) {
+  LOG_TRACE << "Start inference";
+  auto q = std::make_shared<services::SyncQueue>();
+  auto ir = inference_svc_->HandleInference(q, req->getJsonObject());
+  LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString();
+  if (ir.has_error()) {
+    auto err = ir.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err));
+    resp->setStatusCode(
+        static_cast<HttpStatusCode>(std::get<0>(err)["status_code"].asInt()));
+    callback(resp);
+    return;
+  }
+  LOG_TRACE << "Wait to inference";
+  auto [status, res] = q->wait_and_pop();
+  LOG_DEBUG << "response: " << res.toStyledString();
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+  resp->setStatusCode(
+      static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
+  callback(resp);
+  LOG_TRACE << "Done  inference";
+}
+
+void server::RouteRequest(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+
+  LOG_TRACE << "Start route request";
+  auto q = std::make_shared<services::SyncQueue>();
+  auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject());
+  LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString();
+  if (ir.has_error()) {
+    auto err = ir.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err));
+    resp->setStatusCode(
+        static_cast<HttpStatusCode>(std::get<0>(err)["status_code"].asInt()));
+    callback(resp);
+    return;
+  }
+  LOG_TRACE << "Wait to route request";
+  auto [status, res] = q->wait_and_pop();
+  LOG_DEBUG << "response: " << res.toStyledString();
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+  resp->setStatusCode(
+      static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
+  callback(resp);
+  LOG_TRACE << "Done  route request";
+}
+
 void server::LoadModel(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
   auto ir = inference_svc_->LoadModel(req->getJsonObject());
diff --git a/engine/controllers/server.h b/engine/controllers/server.h
index 22ea86c30..b6b125f97 100644
--- a/engine/controllers/server.h
+++ b/engine/controllers/server.h
@@ -25,7 +25,8 @@ namespace inferences {
 class server : public drogon::HttpController<server, false>,
                public BaseModel,
                public BaseChatCompletion,
-               public BaseEmbedding {
+               public BaseEmbedding,
+               public BasePythonModel {
  public:
   server(std::shared_ptr<services::InferenceService> inference_service,
          std::shared_ptr<EngineService> engine_service);
@@ -46,8 +47,11 @@ class server : public drogon::HttpController<server, false>,
   ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Options, Post);
   ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Options, Post);
   ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Options, Post);
+  ADD_METHOD_TO(server::Inference, "/v1/inference", Options, Post);
+  ADD_METHOD_TO(server::RouteRequest, "/v1/route/request", Options, Post);
 
   METHOD_LIST_END
+
   void ChatCompletion(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
@@ -69,6 +73,12 @@ class server : public drogon::HttpController<server, false>,
   void FineTuning(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
+  void Inference(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) override;
+  void RouteRequest(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) override;
 
  private:
   void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index ebc0674e1..a9a7a2926 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -69,14 +69,13 @@ class EngineI {
   virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0;
 
   virtual Json::Value GetRemoteModels() = 0;
-  virtual void HandleRequest(
+  virtual void HandleRouteRequest(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
   virtual void HandleInference(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) = 0;
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
   // Stop inflight chat completion in stream mode
   virtual void StopInferencing(const std::string& model_id) = 0;
-
 };
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index d61894543..2abcb3a2c 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -660,7 +660,11 @@ void PythonEngine::HandleInference(
     callback(std::move(status), std::move(response_json));
   }
 }
-void PythonEngine::HandleRequest(
+Json::Value PythonEngine::GetRemoteModels() {
+  return Json::Value();
+}
+void PythonEngine::StopInferencing(const std::string& model_id) {}
+void PythonEngine::HandleRouteRequest(
     std::shared_ptr<Json::Value> json_body,
     std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
   if (!json_body->isMember("model") || !json_body->isMember("method") ||
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index f7fff434d..50e58f62b 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -152,11 +152,13 @@ class PythonEngine : public EngineI {
   bool IsSupported(const std::string& feature) override;
   bool SetFileLogger(int max_log_lines, const std::string& log_path) override;
   void SetLogLevel(trantor::Logger::LogLevel logLevel) override;
-  void HandleRequest(
+  void HandleRouteRequest(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-  virtual void HandleInference(
-        std::shared_ptr<Json::Value> json_body,
-        std::function<void(Json::Value &&, Json::Value &&)> &&callback) = 0;
+  void HandleInference(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+  Json::Value GetRemoteModels() override;
+  void StopInferencing(const std::string& model_id) override;
 };
 }  // namespace python_engine
\ No newline at end of file
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index a4bccb66f..7731e0f6c 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -7,6 +7,7 @@
 #include <vector>
 #include "algorithm"
 #include "database/engines.h"
+#include "extensions/python-engine/python_engine.h"
 #include "extensions/remote-engine/anthropic_engine.h"
 #include "extensions/remote-engine/openai_engine.h"
 #include "utils/archive_utils.h"
@@ -198,7 +199,6 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
     return cpp::result<bool, std::string>(true);
   }
 
-
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -533,7 +533,6 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
                      " is not installed yet!");
   }
 
-  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -677,7 +676,6 @@ cpp::result<EngineV, std::string> EngineService::GetLoadedEngine(
   return engines_[ne].engine;
 }
 
-
 cpp::result<void, std::string> EngineService::LoadEngine(
     const std::string& engine_name) {
   auto ne = NormalizeEngine(engine_name);
@@ -687,6 +685,13 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     return {};
   }
 
+  // Check for python engine
+
+  if (engine_name == kPythonEngine) {
+    engines_[engine_name].engine = new python_engine::PythonEngine();
+    CTL_INF("Loaded engine: " << engine_name);
+    return {};
+  }
 
   // Check for remote engine
   if (remote_engine::IsRemoteEngine(engine_name)) {
@@ -709,7 +714,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
 
   CTL_INF("Loading engine: " << ne);
 
-
   auto engine_dir_path_res = GetEngineDirPath(ne);
   if (engine_dir_path_res.has_error()) {
     return cpp::fail(engine_dir_path_res.error());
@@ -888,6 +892,10 @@ cpp::result<bool, std::string> EngineService::IsEngineReady(
   }
 
   // End hard code
+  // Check for python engine
+  if (engine == kPythonEngine) {
+    return true;
+  }
 
   auto os = hw_inf_.sys_inf->os;
   if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) {
@@ -918,7 +926,6 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
   CTL_INF("Default variant: " << default_variant->variant
                               << ", version: " + default_variant->version);
 
-  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
index 91cb277dc..85dbd215f 100644
--- a/engine/services/inference_service.cc
+++ b/engine/services/inference_service.cc
@@ -73,6 +73,64 @@ cpp::result<void, InferResult> InferenceService::HandleEmbedding(
   return {};
 }
 
+cpp::result<void, InferResult> InferenceService::HandleInference(
+    std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body) {
+  std::string engine_type;
+  if (!HasFieldInReq(json_body, "engine")) {
+    engine_type = kLlamaRepo;
+  } else {
+    engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
+  }
+
+  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  if (engine_result.has_error()) {
+    Json::Value res;
+    Json::Value stt;
+    res["message"] = "Engine is not loaded yet";
+    stt["status_code"] = drogon::k400BadRequest;
+    LOG_WARN << "Engine is not loaded yet";
+    return cpp::fail(std::make_pair(stt, res));
+  }
+
+  auto cb = [q](Json::Value status, Json::Value res) {
+    q->push(std::make_pair(status, res));
+  };
+  if (std::holds_alternative<EngineI*>(engine_result.value())) {
+    std::get<EngineI*>(engine_result.value())
+        ->HandleInference(json_body, std::move(cb));
+  }
+  return {};
+}
+
+cpp::result<void, InferResult> InferenceService::HandleRouteRequest(
+    std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body) {
+  std::string engine_type;
+  if (!HasFieldInReq(json_body, "engine")) {
+    engine_type = kLlamaRepo;
+  } else {
+    engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
+  }
+
+  auto engine_result = engine_service_->GetLoadedEngine(engine_type);
+  if (engine_result.has_error()) {
+    Json::Value res;
+    Json::Value stt;
+    res["message"] = "Engine is not loaded yet";
+    stt["status_code"] = drogon::k400BadRequest;
+    LOG_WARN << "Engine is not loaded yet";
+    return cpp::fail(std::make_pair(stt, res));
+  }
+
+  auto cb = [q](Json::Value status, Json::Value res) {
+    q->push(std::make_pair(status, res));
+  };
+  if (std::holds_alternative<EngineI*>(engine_result.value())) {
+    std::get<EngineI*>(engine_result.value())
+        ->HandleRouteRequest(json_body, std::move(cb));
+  }
+  return {};
+}
+
 InferResult InferenceService::LoadModel(
     std::shared_ptr<Json::Value> json_body) {
   std::string engine_type;
diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h
index b417fa14a..61d88fee2 100644
--- a/engine/services/inference_service.h
+++ b/engine/services/inference_service.h
@@ -3,9 +3,9 @@
 #include <condition_variable>
 #include <mutex>
 #include <queue>
+#include "extensions/remote-engine/remote_engine.h"
 #include "services/engine_service.h"
 #include "utils/result.hpp"
-#include "extensions/remote-engine/remote_engine.h"
 namespace services {
 // Status and result
 using InferResult = std::pair<Json::Value, Json::Value>;
@@ -41,6 +41,12 @@ class InferenceService {
   cpp::result<void, InferResult> HandleEmbedding(
       std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body);
 
+  cpp::result<void, InferResult> HandleInference(
+      std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body);
+
+  cpp::result<void, InferResult> HandleRouteRequest(
+      std::shared_ptr<SyncQueue> q, std::shared_ptr<Json::Value> json_body);
+      
   InferResult LoadModel(std::shared_ptr<Json::Value> json_body);
 
   InferResult UnloadModel(const std::string& engine,
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 21e3dafaa..cb8a0b1ab 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -769,18 +769,48 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
     constexpr const int kDefautlContextLength = 8192;
     int max_model_context_length = kDefautlContextLength;
     Json::Value json_data;
-    // Currently we don't support download vision models, so we need to bypass check
-    if (!params_override.bypass_model_check()) {
-      auto model_entry = modellist_handler.GetModelInfo(model_handle);
-      if (model_entry.has_error()) {
-        CTL_WRN("Error: " + model_entry.error());
-        return cpp::fail(model_entry.error());
-      }
-      yaml_handler.ModelConfigFromFile(
+    auto model_entry = modellist_handler.GetModelInfo(model_handle);
+    if (model_entry.has_error()) {
+      CTL_WRN("Error: " + model_entry.error());
+      return cpp::fail(model_entry.error());
+    }
+    yaml_handler.ModelConfigFromFile(
+        fmu::ToAbsoluteCortexDataPath(
+            fs::path(model_entry.value().path_to_model_yaml))
+            .string());
+    auto mc = yaml_handler.GetModelConfig();
+
+    // Check if Python model first
+    if (mc.engine == kPythonEngine) {
+      json_data["model"] = model_handle;
+      json_data["model_path"] =
           fmu::ToAbsoluteCortexDataPath(
               fs::path(model_entry.value().path_to_model_yaml))
-              .string());
-      auto mc = yaml_handler.GetModelConfig();
+              .string();
+      json_data["engine"] = mc.engine;
+      assert(!!inference_svc_);
+      // Check if python engine
+
+      auto ir =
+          inference_svc_->LoadModel(std::make_shared<Json::Value>(json_data));
+      auto status = std::get<0>(ir)["status_code"].asInt();
+      auto data = std::get<1>(ir);
+
+      if (status == drogon::k200OK) {
+        return StartModelResult{.success = true, .warning = ""};
+      } else if (status == drogon::k409Conflict) {
+        CTL_INF("Model '" + model_handle + "' is already loaded");
+        return StartModelResult{.success = true, .warning = ""};
+      } else {
+        // only report to user the error
+        CTL_ERR("Model failed to start with status code: " << status);
+        return cpp::fail("Model failed to start: " +
+                         data["message"].asString());
+      }
+    }
+
+    // Currently we don't support download vision models, so we need to bypass check
+    if (!params_override.bypass_model_check()) {
 
       // Running remote model
       if (remote_engine::IsRemoteEngine(mc.engine)) {
@@ -881,6 +911,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
     }
 
     assert(!!inference_svc_);
+    // Check if python engine
+
     auto ir =
         inference_svc_->LoadModel(std::make_shared<Json::Value>(json_data));
     auto status = std::get<0>(ir)["status_code"].asInt();

From e6324c280b7ce867116da9016af7ea8b94da29f4 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 16 Dec 2024 09:50:05 +0700
Subject: [PATCH 13/34] fix: remove pythone engine load engine option

---
 engine/extensions/python-engine/python_engine.cc | 4 ----
 engine/extensions/python-engine/python_engine.h  | 1 -
 2 files changed, 5 deletions(-)

diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 2abcb3a2c..ea980e870 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -905,10 +905,6 @@ void PythonEngine::SetLogLevel(trantor::Logger::LogLevel log_level) {
   trantor::Logger::setLogLevel(log_level);
 }
 
-void PythonEngine::RegisterLibraryPath(RegisterLibraryOption opts) {
-
-};
-
 void PythonEngine::Load(EngineLoadOption opts) {
   // Develop register model here on loading engine
 };
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index 50e58f62b..e404f2e19 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -118,7 +118,6 @@ class PythonEngine : public EngineI {
  public:
   PythonEngine();
   ~PythonEngine();
-  void RegisterLibraryPath(RegisterLibraryOption opts) override;
 
   void Load(EngineLoadOption opts) override;
 

From 3838a36e010ce036bc59c684c66e71820e8ca849 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 16 Dec 2024 23:58:40 +0700
Subject: [PATCH 14/34] Feat: init environment interface

---
 engine/common/download_task.h                 |  13 +-
 .../extensions/python-engine/python_engine.cc |   1 +
 engine/services/environment_serrvice.cc       |   0
 engine/services/environment_service.h         |  51 ++++++
 engine/utils/curl_utils.cc                    |  21 ++-
 engine/utils/curl_utils.h                     |   5 +-
 engine/utils/environment_utils.h              | 168 ++++++++++++++++++
 7 files changed, 245 insertions(+), 14 deletions(-)
 create mode 100644 engine/services/environment_serrvice.cc
 create mode 100644 engine/services/environment_service.h
 create mode 100644 engine/utils/environment_utils.h

diff --git a/engine/common/download_task.h b/engine/common/download_task.h
index 95e736394..53f1902c5 100644
--- a/engine/common/download_task.h
+++ b/engine/common/download_task.h
@@ -6,7 +6,14 @@
 #include <sstream>
 #include <string>
 
-enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex };
+enum class DownloadType {
+  Model,
+  Engine,
+  Miscellaneous,
+  CudaToolkit,
+  Cortex,
+  Environments
+};
 
 struct DownloadItem {
 
@@ -48,6 +55,8 @@ inline std::string DownloadTypeToString(DownloadType type) {
       return "CudaToolkit";
     case DownloadType::Cortex:
       return "Cortex";
+    case DownloadType::Environments:
+      return "Environments";
     default:
       return "Unknown";
   }
@@ -64,6 +73,8 @@ inline DownloadType DownloadTypeFromString(const std::string& str) {
     return DownloadType::CudaToolkit;
   } else if (str == "Cortex") {
     return DownloadType::Cortex;
+  } else if (str == "Environments") {
+    return DownloadType::Environments;
   } else {
     return DownloadType::Miscellaneous;
   }
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index ea980e870..b422a7340 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -319,6 +319,7 @@ void PythonEngine::GetModels(
 void PythonEngine::LoadModel(
     std::shared_ptr<Json::Value> json_body,
     std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+      // TODO: handle a case that can spawn process but the process spawn fail. 
   pid_t pid;
   if (!json_body->isMember("model") || !json_body->isMember("model_path")) {
     Json::Value error;
diff --git a/engine/services/environment_serrvice.cc b/engine/services/environment_serrvice.cc
new file mode 100644
index 000000000..e69de29bb
diff --git a/engine/services/environment_service.h b/engine/services/environment_service.h
new file mode 100644
index 000000000..b26cd3cf4
--- /dev/null
+++ b/engine/services/environment_service.h
@@ -0,0 +1,51 @@
+#pragma once
+#include <filesystem>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+#include "utils/environment_utils.h"
+#include "utils/system_info_utils.h"
+
+using Environment = environment_utils::Environment;
+
+struct EnvironmentsUpdateResult {
+  Environment environment;
+  std::string from;
+  std::string to;
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["environment"] = environment.ToJson();
+    root["from"] = from;
+    root["to"] = to;
+    return root;
+  }
+};
+
+class EnvironmentService {
+ public:
+  cpp::result<bool, std::string> IsEnvironmentReady(
+      const std::string& environment);
+  cpp::result<void, std::string> InstallEnvironmentAsync(
+      const std::string& environment, const std::string& version);
+  cpp::result<void, std::string> UnInstallEnvironment(
+      const std::string& environment, const std::string& version);
+  cpp::result<std::vector<Environment>, std::string> GetEnvironmentReleases(
+      const std::string& environment) const;
+  cpp::result<std::vector<Environment>, std::string> GetInstalledEnvironments()
+      const;
+  cpp::result<std::vector<Environment>, std::string> GetDefaultEnvironment(
+      const std::string& environment) const;
+  cpp::result<std::vector<Environment>, std::string> SetDefaultEnvironment(
+      const std::string& environment) const;
+
+ private:
+  cpp::result<void, std::string> DownloadEnvironment(
+      const std::string& environment, const std::string& version = "latest");
+  cpp::result<std::pair<std::filesystem::path, bool>, std::string>
+  GetEnvironmentDirPath(const std::string& environment);
+};
\ No newline at end of file
diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc
index 9ba0f5a76..b60f76fc2 100644
--- a/engine/utils/curl_utils.cc
+++ b/engine/utils/curl_utils.cc
@@ -242,8 +242,8 @@ cpp::result<YAML::Node, std::string> ReadRemoteYaml(const std::string& url) {
   }
 }
 
-cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
-                                                    const int timeout) {
+cpp::result<Json::Value, std::string> SimpleGetJson(
+    const std::string& url, const int timeout, std::optional<bool> recursive) {
   auto result = SimpleGet(url, timeout);
   if (result.has_error()) {
     CTL_ERR("Failed to get JSON from " + url + ": " + result.error());
@@ -257,11 +257,11 @@ cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
                      " parsing error: " + reader.getFormattedErrorMessages());
   }
 
-  if (root.isArray()) {
+  if (root.isArray() && recursive) {
     for (const auto& value : root) {
       if (value["type"].asString() == "directory") {
         auto temp =
-            SimpleGetJson(url + "/" + value["path"].asString(), timeout);
+            SimpleGetJson(url + "/" + value["path"].asString(), timeout, recursive);
         if (!temp.has_error()) {
           if (temp.value().isArray()) {
             for (const auto& item : temp.value()) {
@@ -273,14 +273,13 @@ cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
         }
       }
     }
-   for (Json::ArrayIndex i = 0; i < root.size();) {
-        if (root[i].isMember("type") && root[i]["type"] == "directory") {
-            root.removeIndex(i, nullptr);
-        } else {
-            ++i;
-        }
+    for (Json::ArrayIndex i = 0; i < root.size();) {
+      if (root[i].isMember("type") && root[i]["type"] == "directory") {
+        root.removeIndex(i, nullptr);
+      } else {
+        ++i;
+      }
     }
-   
   }
   return root;
 }
diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h
index 64b5fc339..8bf324dd9 100644
--- a/engine/utils/curl_utils.h
+++ b/engine/utils/curl_utils.h
@@ -32,8 +32,9 @@ cpp::result<YAML::Node, std::string> ReadRemoteYaml(const std::string& url);
  *
  * [timeout] is an optional parameter that specifies the timeout for the request. In second.
  */
-cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
-                                                    const int timeout = -1);
+cpp::result<Json::Value, std::string> SimpleGetJson(
+    const std::string& url, const int timeout = -1,
+    std::optional<bool> recursive = true);
 
 cpp::result<Json::Value, std::string> SimplePostJson(
     const std::string& url, const std::string& body = "");
diff --git a/engine/utils/environment_utils.h b/engine/utils/environment_utils.h
new file mode 100644
index 000000000..d6666a122
--- /dev/null
+++ b/engine/utils/environment_utils.h
@@ -0,0 +1,168 @@
+#pragma once
+#include <json/json.h>
+#include <optional>
+#include <string>
+#include <unordered_set>
+#include <vector>
+#include "utils/curl_utils.h"
+#include "utils/result.hpp"
+namespace environment_utils {
+
+constexpr const auto kBaseEnvironmentsUrl =
+    "https://delta.jan.ai/environments/";
+
+struct Environment {
+  std::string type;     // e.g., "python"
+  std::string name;     // e.g., "whispervq"
+  std::string version;  // e.g., "latest"
+  std::string os;       // e.g., "window", "linux"
+  std::string arch;     // e.g., "amd64"
+
+  // Convert Environment to JSON
+  Json::Value ToJson() const {
+    Json::Value json;
+    json["type"] = type;
+    json["name"] = name;
+    json["version"] = version;
+    json["os"] = os;
+    json["arch"] = arch;
+    return json;
+  }
+
+  // Create Environment from JSON
+  static cpp::result<Environment, std::string> FromJson(
+      const Json::Value& json) {
+    Environment env;
+
+    // Validate required fields
+    const std::vector<std::string> required_fields = {"type", "name", "version",
+                                                      "os", "arch"};
+
+    for (const auto& field : required_fields) {
+      if (!json.isMember(field) || json[field].asString().empty()) {
+        return cpp::fail("Missing or empty required field: " + field);
+      }
+    }
+
+    env.type = json["type"].asString();
+    env.name = json["name"].asString();
+    env.version = json["version"].asString();
+    env.os = json["os"].asString();
+    env.arch = json["arch"].asString();
+
+    return env;
+  }
+
+  // Method to generate full artifact URL
+  std::string generateUrl() const {
+    return kBaseEnvironmentsUrl + type + "/" + name + "/" + version + "/" +
+           name + "-" + os + "-" + arch + ".zip";
+  }
+
+  // Method to validate the environment structure
+  bool isValid() const {
+    return !type.empty() && !name.empty() && !version.empty() && !os.empty() &&
+           !arch.empty();
+  }
+};
+
+// Utility function to parse URL components into an Environment struct
+cpp::result<Environment, std::string> parseEnvironmentUrl(
+    const std::string& url) {
+  Environment env;
+
+  size_t environments_pos = url.find("environments/");
+  if (environments_pos == std::string::npos) {
+    return cpp::fail("Invalid URL format");
+  }
+
+  std::string remaining = url.substr(environments_pos + 13);
+  std::vector<std::string> parts;
+  size_t pos = 0;
+  while ((pos = remaining.find('/')) != std::string::npos) {
+    parts.push_back(remaining.substr(0, pos));
+    remaining.erase(0, pos + 1);
+  }
+  parts.push_back(remaining);
+
+  if (parts.size() < 5) {
+    return cpp::fail("Insufficient URL components");
+  }
+
+  env.type = parts[0];
+  env.name = parts[1];
+  env.version = parts[2];
+
+  // Extract OS and arch from the filename
+  std::string filename = parts[3];
+  size_t os_sep = filename.find('-');
+  size_t arch_sep = filename.find('-', os_sep + 1);
+
+  if (os_sep == std::string::npos || arch_sep == std::string::npos) {
+    return cpp::fail("Cannot parse OS and architecture");
+  }
+
+  env.os = filename.substr(os_sep + 1, arch_sep - os_sep - 1);
+  env.arch = filename.substr(arch_sep + 1, filename.find('.') - arch_sep - 1);
+
+  return env;
+}
+
+// Fetch environment names
+cpp::result<std::vector<std::string>, std::string> fetchEnvironmentNames(
+    const std::string& type, int timeout = 30) {
+  auto url = kBaseEnvironmentsUrl + type;
+  auto json_result = curl_utils::SimpleGetJson(url, timeout, false);
+  if (json_result.has_error()) {
+    return cpp::fail(json_result.error());
+  }
+
+  std::vector<std::string> environment_names;
+  const Json::Value& root = json_result.value();
+
+  // Store unique environment names
+  std::unordered_set<std::string> unique_names;
+
+  for (const auto& item : root) {
+    if (item.isMember("path")) {
+      environment_names.push_back(item["path"].asString());
+    }
+  }
+
+  return environment_names;
+}
+
+// Get all versions for a specific environment
+cpp::result<std::vector<std::string>, std::string> fetchEnvironmentVersions(
+    const std::string& base_url, const std::string& environment_name,
+    int timeout = 30, bool recursive = true) {
+  auto json_result = curl_utils::SimpleGetJson(
+      base_url + "/" + environment_name, timeout, recursive);
+  if (json_result.has_error()) {
+    return cpp::fail(json_result.error());
+  }
+
+  std::vector<std::string> versions;
+  const Json::Value& root = json_result.value();
+
+  // Store unique versions
+  std::unordered_set<std::string> unique_versions;
+
+  for (const auto& item : root) {
+    if (item.isMember("path")) {
+      auto url_parse_result = parseEnvironmentUrl(
+          base_url + "/" + environment_name + "/" + item["path"].asString());
+      if (!url_parse_result.has_error()) {
+        const auto& env = url_parse_result.value();
+        // Only add if not already present
+        if (unique_versions.insert(env.version).second) {
+          versions.push_back(env.version);
+        }
+      }
+    }
+  }
+
+  return versions;
+}
+
+}  // namespace environment_utils
\ No newline at end of file

From 34237d67fdecea8572a4a1439640a681f78541d9 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Tue, 17 Dec 2024 19:20:18 +0700
Subject: [PATCH 15/34] feat: move virtual environment inside model

---
 engine/config/model_config.h                  |  35 +++-
 .../extensions/python-engine/python_engine.cc |  21 +--
 engine/services/environment_serrvice.cc       |   0
 engine/services/environment_service.h         |  51 ------
 engine/services/model_service.cc              |  61 ++++++-
 engine/utils/environment_constants.h          |   3 -
 engine/utils/environment_utils.h              | 168 ------------------
 engine/utils/set_permission_utils.h           | 106 +++++++++++
 8 files changed, 198 insertions(+), 247 deletions(-)
 delete mode 100644 engine/services/environment_serrvice.cc
 delete mode 100644 engine/services/environment_service.h
 delete mode 100644 engine/utils/environment_constants.h
 delete mode 100644 engine/utils/environment_utils.h
 create mode 100644 engine/utils/set_permission_utils.h

diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index ccf3e2ec0..0743cdab2 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -547,17 +547,17 @@ struct PythonModelConfig {
 
   // Model Load Parameters
   std::string port;
-  std::string model_location;
   std::string script;
   std::string log_path;
   std::string log_level;
   std::string environment;
   std::vector<std::string> command;  // New command field
+  std::vector<std::string> files;
   std::string engine;
   Json::Value extra_params;  // Accept dynamic extra parameters
 
   // Method to convert C++ struct to YAML
-  void ToYaml(const std::string & filepath) const {
+  void ToYaml(const std::string& filepath) const {
     YAML::Emitter out;
     out << YAML::BeginMap;
 
@@ -597,7 +597,6 @@ struct PythonModelConfig {
 
     // Model Load Parameters
     out << YAML::Key << "port" << YAML::Value << port;
-    out << YAML::Key << "model_location" << YAML::Value << model_location;
     out << YAML::Key << "script" << YAML::Value << script;
     out << YAML::Key << "log_path" << YAML::Value << log_path;
     out << YAML::Key << "log_level" << YAML::Value << log_level;
@@ -610,6 +609,13 @@ struct PythonModelConfig {
     }
     out << YAML::EndSeq;
 
+    // Serialize files as YAML list
+    out << YAML::Key << "files" << YAML::Value << YAML::BeginSeq;
+    for (const auto& file : files) {
+      out << file;
+    }
+    out << YAML::EndSeq;
+
     out << YAML::Key << "engine" << YAML::Value << engine;
 
     // Serialize extra_params as YAML
@@ -674,8 +680,6 @@ struct PythonModelConfig {
     auto mlp = config;
     if (mlp["port"])
       port = mlp["port"].as<std::string>();
-    if (mlp["model_location"])
-      model_location = mlp["model_location"].as<std::string>();
     if (mlp["script"])
       script = mlp["script"].as<std::string>();
     if (mlp["log_path"])
@@ -693,6 +697,12 @@ struct PythonModelConfig {
       }
     }
 
+    if (mlp["files"] && mlp["files"].IsSequence()) {
+      for (const auto& file : mlp["files"]) {
+        files.push_back(file.as<std::string>());
+      }
+    }
+
     if (mlp["extra_params"]) {
       for (YAML::const_iterator it = mlp["extra_params"].begin();
            it != mlp["extra_params"].end(); ++it) {
@@ -735,7 +745,6 @@ struct PythonModelConfig {
     root["log_path"] = log_path;
     root["log_level"] = log_level;
     root["environment"] = environment;
-    root["model_location"] = model_location;
     root["script"] = script;
 
     // Serialize command as JSON array
@@ -743,6 +752,10 @@ struct PythonModelConfig {
       root["command"].append(cmd);
     }
 
+    for (const auto& file : files) {
+      root["files"].append(file);
+    }
+
     root["engine"] = engine;
     root["extra_params"] = extra_params;  // Serialize the JSON value directly
 
@@ -751,7 +764,7 @@ struct PythonModelConfig {
 
   // Method to populate struct from JSON
   void FromJson(const Json::Value& root) {
-   
+
     if (root.isMember("id"))
       id = root["id"].asString();
     if (root.isMember("model"))
@@ -802,8 +815,6 @@ struct PythonModelConfig {
       environment = mlp["environment"].asString();
     if (mlp.isMember("engine"))
       engine = mlp["engine"].asString();
-    if (mlp.isMember("model_location"))
-      model_location = mlp["model_location"].asString();
     if (mlp.isMember("script"))
       script = mlp["script"].asString();
 
@@ -813,6 +824,12 @@ struct PythonModelConfig {
       }
     }
 
+    if (mlp.isMember("files")) {
+      for (const auto& file : mlp["files"]) {
+        files.push_back(file.asString());
+      }
+    }
+
     if (mlp.isMember("extra_params")) {
       extra_params = mlp["extra_params"];  // Directly assign the JSON value
     }
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index b422a7340..5e85ed6b6 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -106,9 +106,9 @@ pid_t PythonEngine::SpawnProcess(const std::string& model,
 
     // Convert command vector to char*[]
     std::vector<char*> argv = convertToArgv(command);
-    for (auto c : command) {
-      std::cout << c << " " << std::endl;
-    }
+    // for (auto c : command) {
+    //   std::cout << c << " " << std::endl;
+    // }
 
     // Use posix_spawn for cross-platform compatibility
     int spawn_result = posix_spawn(&pid,                // pid output
@@ -319,7 +319,7 @@ void PythonEngine::GetModels(
 void PythonEngine::LoadModel(
     std::shared_ptr<Json::Value> json_body,
     std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-      // TODO: handle a case that can spawn process but the process spawn fail. 
+  // TODO: handle a case that can spawn process but the process spawn fail.
   pid_t pid;
   if (!json_body->isMember("model") || !json_body->isMember("model_path")) {
     Json::Value error;
@@ -359,26 +359,19 @@ void PythonEngine::LoadModel(
     return;
   }
   auto model_config = models_[model];
+  auto model_folder_path = model_config.files[0];
+  auto data_folder_path = std::filesystem::path(model_folder_path) / std::filesystem::path("venv");
   try {
-    std::string data_folder_path =
-        "/home/thuan/cortexcpp/environments/";  // To do: will be removed with cortex data path
-    std::string model_folder_path =
-        "/home/thuan/cortexcpp/models/cortex.so/whispervq/fp16/";  // To do: will be removed with cortex model path
 #ifdef _WIN32
     auto executable = std::filesystem::path(data_folder_path) /
-                      std::filesystem::path(model_config.environment) /
                       std::filesystem::path("Scripts");
 #else
     auto executable = std::filesystem::path(data_folder_path) /
-                      std::filesystem::path(model_config.environment) /
                       std::filesystem::path("bin");
 #endif
-    std::cout << "executable string: " << executable.string()
-              << data_folder_path << " " << model_config.environment
-              << std::endl;
+
     auto executable_str =
         (executable / std::filesystem::path(model_config.command[0])).string();
-    std::cout << "executable string: " << executable_str << std::endl;
     auto command = model_config.command;
     command[0] = executable_str;
     command.push_back((std::filesystem::path(model_folder_path) /
diff --git a/engine/services/environment_serrvice.cc b/engine/services/environment_serrvice.cc
deleted file mode 100644
index e69de29bb..000000000
diff --git a/engine/services/environment_service.h b/engine/services/environment_service.h
deleted file mode 100644
index b26cd3cf4..000000000
--- a/engine/services/environment_service.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-#include <filesystem>
-#include <memory>
-#include <mutex>
-#include <optional>
-#include <string>
-#include <string_view>
-#include <unordered_map>
-#include <vector>
-#include "utils/environment_utils.h"
-#include "utils/system_info_utils.h"
-
-using Environment = environment_utils::Environment;
-
-struct EnvironmentsUpdateResult {
-  Environment environment;
-  std::string from;
-  std::string to;
-
-  Json::Value ToJson() const {
-    Json::Value root;
-    root["environment"] = environment.ToJson();
-    root["from"] = from;
-    root["to"] = to;
-    return root;
-  }
-};
-
-class EnvironmentService {
- public:
-  cpp::result<bool, std::string> IsEnvironmentReady(
-      const std::string& environment);
-  cpp::result<void, std::string> InstallEnvironmentAsync(
-      const std::string& environment, const std::string& version);
-  cpp::result<void, std::string> UnInstallEnvironment(
-      const std::string& environment, const std::string& version);
-  cpp::result<std::vector<Environment>, std::string> GetEnvironmentReleases(
-      const std::string& environment) const;
-  cpp::result<std::vector<Environment>, std::string> GetInstalledEnvironments()
-      const;
-  cpp::result<std::vector<Environment>, std::string> GetDefaultEnvironment(
-      const std::string& environment) const;
-  cpp::result<std::vector<Environment>, std::string> SetDefaultEnvironment(
-      const std::string& environment) const;
-
- private:
-  cpp::result<void, std::string> DownloadEnvironment(
-      const std::string& environment, const std::string& version = "latest");
-  cpp::result<std::pair<std::filesystem::path, bool>, std::string>
-  GetEnvironmentDirPath(const std::string& environment);
-};
\ No newline at end of file
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 7faa59ae5..4ebb605b4 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -2,6 +2,7 @@
 #include <curl/multi.h>
 #include <drogon/HttpTypes.h>
 #include <filesystem>
+#include <fstream>
 #include <iostream>
 #include <optional>
 #include <ostream>
@@ -9,6 +10,7 @@
 #include "config/yaml_config.h"
 #include "database/models.h"
 #include "hardware_service.h"
+#include "utils/archive_utils.h"
 #include "utils/cli_selection_utils.h"
 #include "utils/cortex_utils.h"
 #include "utils/engine_constants.h"
@@ -16,6 +18,7 @@
 #include "utils/huggingface_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
+#include "utils/set_permission_utils.h"
 #include "utils/string_utils.h"
 #include "utils/widechar_conv.h"
 
@@ -540,9 +543,63 @@ ModelService::DownloadModelFromCortexsoAsync(
     if (mc.engine == kPythonEngine) {  // process for Python engine
       config::PythonModelConfig python_model_config;
       python_model_config.ReadFromYaml(model_yml_item->localPath.string());
-      python_model_config.model_location =
-          model_yml_item->localPath.parent_path().string();
+      python_model_config.files.push_back(
+          model_yml_item->localPath.parent_path().string());
       python_model_config.ToYaml(model_yml_item->localPath.string());
+      // unzip venv.zip
+      auto model_folder = model_yml_item->localPath.parent_path();
+      auto venv_path = model_folder / std::filesystem::path("venv");
+      if (!std::filesystem::exists(venv_path)) {
+        std::filesystem::create_directories(venv_path);
+      }
+      auto venv_zip = model_folder / std::filesystem::path("venv.zip");
+      if (std::filesystem::exists(venv_zip)) {
+        if (archive_utils::ExtractArchive(venv_zip.string(), venv_path)) {
+          std::filesystem::remove_all(venv_zip);
+          CTL_INF("Successfully extract venv.zip");
+          // If extract success create pyvenv.cfg
+          std::ofstream pyvenv_cfg(venv_path /
+                                   std::filesystem::path("pyvenv.cfg"));
+#ifdef _WIN32
+          pyvenv_cfg << "home = "
+                     << (venv_path / std::filesystem::path("Scripts")).string()
+                     << std::endl;
+          pyvenv_cfg << "executable = "
+                     << (venv_path / std::filesystem::path("Scripts") /
+                         std::filesystem::path("python.exe"))
+                            .string()
+                     << std::endl;
+
+#else
+          pyvenv_cfg << "home = "
+                     << (venv_path / std::filesystem::path("bin/")).string()
+                     << std::endl;
+          pyvenv_cfg
+              << "executable = "
+              << (venv_path / std::filesystem::path("bin/python")).string()
+              << std::endl;
+#endif
+
+          // Close the file
+          pyvenv_cfg.close();
+          // Add executable permission to python
+
+#ifdef _WIN32
+          set_permission_utils::SetExecutePermissionsRecursive(
+              venv_path / std::filesystem::path("Scripts"));
+#else
+          set_permission_utils::SetExecutePermissionsRecursive(
+              venv_path / std::filesystem::path("bin"));
+#endif
+
+        } else {
+          CTL_ERR("Failed to extract venv.zip");
+        };
+
+      } else {
+        CTL_ERR(
+            "venv.zip not found in model folder: " << model_folder.string());
+      }
 
     } else {
       mc.model = unique_model_id;
diff --git a/engine/utils/environment_constants.h b/engine/utils/environment_constants.h
deleted file mode 100644
index f14df67f8..000000000
--- a/engine/utils/environment_constants.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma once
-
-constexpr const auto kWhisperVQEnvironment = "whispervq"; 
\ No newline at end of file
diff --git a/engine/utils/environment_utils.h b/engine/utils/environment_utils.h
deleted file mode 100644
index d6666a122..000000000
--- a/engine/utils/environment_utils.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#pragma once
-#include <json/json.h>
-#include <optional>
-#include <string>
-#include <unordered_set>
-#include <vector>
-#include "utils/curl_utils.h"
-#include "utils/result.hpp"
-namespace environment_utils {
-
-constexpr const auto kBaseEnvironmentsUrl =
-    "https://delta.jan.ai/environments/";
-
-struct Environment {
-  std::string type;     // e.g., "python"
-  std::string name;     // e.g., "whispervq"
-  std::string version;  // e.g., "latest"
-  std::string os;       // e.g., "window", "linux"
-  std::string arch;     // e.g., "amd64"
-
-  // Convert Environment to JSON
-  Json::Value ToJson() const {
-    Json::Value json;
-    json["type"] = type;
-    json["name"] = name;
-    json["version"] = version;
-    json["os"] = os;
-    json["arch"] = arch;
-    return json;
-  }
-
-  // Create Environment from JSON
-  static cpp::result<Environment, std::string> FromJson(
-      const Json::Value& json) {
-    Environment env;
-
-    // Validate required fields
-    const std::vector<std::string> required_fields = {"type", "name", "version",
-                                                      "os", "arch"};
-
-    for (const auto& field : required_fields) {
-      if (!json.isMember(field) || json[field].asString().empty()) {
-        return cpp::fail("Missing or empty required field: " + field);
-      }
-    }
-
-    env.type = json["type"].asString();
-    env.name = json["name"].asString();
-    env.version = json["version"].asString();
-    env.os = json["os"].asString();
-    env.arch = json["arch"].asString();
-
-    return env;
-  }
-
-  // Method to generate full artifact URL
-  std::string generateUrl() const {
-    return kBaseEnvironmentsUrl + type + "/" + name + "/" + version + "/" +
-           name + "-" + os + "-" + arch + ".zip";
-  }
-
-  // Method to validate the environment structure
-  bool isValid() const {
-    return !type.empty() && !name.empty() && !version.empty() && !os.empty() &&
-           !arch.empty();
-  }
-};
-
-// Utility function to parse URL components into an Environment struct
-cpp::result<Environment, std::string> parseEnvironmentUrl(
-    const std::string& url) {
-  Environment env;
-
-  size_t environments_pos = url.find("environments/");
-  if (environments_pos == std::string::npos) {
-    return cpp::fail("Invalid URL format");
-  }
-
-  std::string remaining = url.substr(environments_pos + 13);
-  std::vector<std::string> parts;
-  size_t pos = 0;
-  while ((pos = remaining.find('/')) != std::string::npos) {
-    parts.push_back(remaining.substr(0, pos));
-    remaining.erase(0, pos + 1);
-  }
-  parts.push_back(remaining);
-
-  if (parts.size() < 5) {
-    return cpp::fail("Insufficient URL components");
-  }
-
-  env.type = parts[0];
-  env.name = parts[1];
-  env.version = parts[2];
-
-  // Extract OS and arch from the filename
-  std::string filename = parts[3];
-  size_t os_sep = filename.find('-');
-  size_t arch_sep = filename.find('-', os_sep + 1);
-
-  if (os_sep == std::string::npos || arch_sep == std::string::npos) {
-    return cpp::fail("Cannot parse OS and architecture");
-  }
-
-  env.os = filename.substr(os_sep + 1, arch_sep - os_sep - 1);
-  env.arch = filename.substr(arch_sep + 1, filename.find('.') - arch_sep - 1);
-
-  return env;
-}
-
-// Fetch environment names
-cpp::result<std::vector<std::string>, std::string> fetchEnvironmentNames(
-    const std::string& type, int timeout = 30) {
-  auto url = kBaseEnvironmentsUrl + type;
-  auto json_result = curl_utils::SimpleGetJson(url, timeout, false);
-  if (json_result.has_error()) {
-    return cpp::fail(json_result.error());
-  }
-
-  std::vector<std::string> environment_names;
-  const Json::Value& root = json_result.value();
-
-  // Store unique environment names
-  std::unordered_set<std::string> unique_names;
-
-  for (const auto& item : root) {
-    if (item.isMember("path")) {
-      environment_names.push_back(item["path"].asString());
-    }
-  }
-
-  return environment_names;
-}
-
-// Get all versions for a specific environment
-cpp::result<std::vector<std::string>, std::string> fetchEnvironmentVersions(
-    const std::string& base_url, const std::string& environment_name,
-    int timeout = 30, bool recursive = true) {
-  auto json_result = curl_utils::SimpleGetJson(
-      base_url + "/" + environment_name, timeout, recursive);
-  if (json_result.has_error()) {
-    return cpp::fail(json_result.error());
-  }
-
-  std::vector<std::string> versions;
-  const Json::Value& root = json_result.value();
-
-  // Store unique versions
-  std::unordered_set<std::string> unique_versions;
-
-  for (const auto& item : root) {
-    if (item.isMember("path")) {
-      auto url_parse_result = parseEnvironmentUrl(
-          base_url + "/" + environment_name + "/" + item["path"].asString());
-      if (!url_parse_result.has_error()) {
-        const auto& env = url_parse_result.value();
-        // Only add if not already present
-        if (unique_versions.insert(env.version).second) {
-          versions.push_back(env.version);
-        }
-      }
-    }
-  }
-
-  return versions;
-}
-
-}  // namespace environment_utils
\ No newline at end of file
diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h
new file mode 100644
index 000000000..b91d0c383
--- /dev/null
+++ b/engine/utils/set_permission_utils.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <filesystem>
+#include <iostream>
+#include <system_error>
+#include <vector>
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/stat.h>
+#endif
+#include "utils/logging_utils.h"
+namespace set_permission_utils {
+// Cross-platform method to set execute permission for a single file
+inline bool SetExecutePermission(const std::filesystem::path& filePath,
+                                 bool ownerOnly = false) {
+  std::error_code ec;
+
+#ifdef _WIN32
+  // Windows execution permission handling
+  std::filesystem::path exePath = filePath;
+
+  // Add .exe extension if no extension exists
+  if (exePath.extension().empty()) {
+    exePath += ".exe";
+    std::filesystem::rename(filePath, exePath);
+  }
+
+  // Clear read-only attribute
+  DWORD fileAttributes = GetFileAttributes(exePath.c_str());
+  if (fileAttributes == INVALID_FILE_ATTRIBUTES) {
+    CTL_ERROR << "Error accessing file: " << GetLastError() << std::endl;
+    return false;
+  }
+
+  fileAttributes &= ~FILE_ATTRIBUTE_READONLY;
+
+  if (!SetFileAttributes(exePath.c_str(), fileAttributes)) {
+    CTL_ERROR << "Error setting file attributes: " << GetLastError()
+              << std::endl;
+    return false;
+  }
+
+#else
+  // POSIX systems (Linux, macOS)
+  struct stat st;
+  if (stat(filePath.c_str(), &st) != 0) {
+    CTL_ERR("Error getting file stats: " << strerror(errno));
+    return false;
+  }
+
+  // Set execute permissions based on ownerOnly flag
+  mode_t newMode;
+  if (ownerOnly) {
+    // Only owner can execute
+    newMode = (st.st_mode & ~(S_IXGRP | S_IXOTH)) | S_IXUSR;
+  } else {
+    // Everyone can execute
+    newMode = st.st_mode | S_IXUSR |  // Owner execute
+              S_IXGRP |               // Group execute
+              S_IXOTH;                // Others execute
+  }
+
+  if (chmod(filePath.c_str(), newMode) != 0) {
+    CTL_ERR("Error setting execute permissions: " << strerror(errno));
+    return false;
+  }
+#endif
+
+  return true;
+}
+inline std::vector<std::filesystem::path> SetExecutePermissionsRecursive(
+    const std::filesystem::path& directoryPath, bool ownerOnly = false,
+    bool skipDirectories = true) {
+  std::vector<std::filesystem::path> modifiedFiles;
+
+  try {
+    // Iterate through all files and subdirectories
+    for (const auto& entry :
+         std::filesystem::recursive_directory_iterator(directoryPath)) {
+      // Skip directories if specified
+      if (skipDirectories && entry.is_directory()) {
+        continue;
+      }
+
+      // Only process files
+      if (entry.is_regular_file()) {
+        try {
+          if (SetExecutePermission(entry.path(), ownerOnly)) {
+            modifiedFiles.push_back(entry.path());
+          }
+        } catch (const std::exception& e) {
+          CTL_ERR("Error processing file " + entry.path().string() + ": " +
+                  e.what());
+        }
+      }
+    }
+  } catch (const std::filesystem::filesystem_error& e) {
+    CTL_ERR("Filesystem error: " << e.what());
+  }
+
+  return modifiedFiles;
+}
+
+}  // namespace set_permission_utils
\ No newline at end of file

From 7ce7eb707e1529a871bac37230541d5735143052 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99
 <35255081+nguyenhoangthuan99@users.noreply.github.com>
Date: Wed, 18 Dec 2024 15:07:32 +0700
Subject: [PATCH 16/34] Update CMakeLists.txt

---
 engine/CMakeLists.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 782e2d849..420434eb9 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -150,9 +150,6 @@ add_executable(${TARGET_NAME} main.cc
 
     ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
 
-    ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc
-
 )
 
 target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

From c2b11180b00b0b90447706a125ba58cca2627f6c Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99
 <35255081+nguyenhoangthuan99@users.noreply.github.com>
Date: Wed, 18 Dec 2024 15:10:26 +0700
Subject: [PATCH 17/34] Update CMakeLists.txt

---
 engine/cli/CMakeLists.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index c29ec622b..efff03d10 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -84,9 +84,7 @@ add_executable(${TARGET_NAME} main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
-
-    ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc
+    
     ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc
 

From 7f9ded074d60745090053c403e5ec97224986ab8 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Wed, 18 Dec 2024 15:19:26 +0700
Subject: [PATCH 18/34] fix: CI build

---
 engine/extensions/remote-engine/remote_engine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h
index 54d6b2f83..6f08b5403 100644
--- a/engine/extensions/remote-engine/remote_engine.h
+++ b/engine/extensions/remote-engine/remote_engine.h
@@ -21,7 +21,7 @@ struct StreamContext {
   // Cache value for Anthropic
   std::string id;
   std::string model;
-  TemplateRenderer& renderer;
+  extensions::TemplateRenderer& renderer;
   std::string stream_template;
 };
 struct CurlResponse {

From 27d50974d41605a68d6256d40e3f547a7d2ad9eb Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 19 Dec 2024 15:10:32 +0700
Subject: [PATCH 19/34] fix: move log of python to cortex logs folder

---
 .../extensions/python-engine/python_engine.cc  | 18 ++++++++++++------
 .../extensions/python-engine/python_engine.h   |  1 +
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 5e85ed6b6..12c25a76c 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -360,14 +360,15 @@ void PythonEngine::LoadModel(
   }
   auto model_config = models_[model];
   auto model_folder_path = model_config.files[0];
-  auto data_folder_path = std::filesystem::path(model_folder_path) / std::filesystem::path("venv");
+  auto data_folder_path =
+      std::filesystem::path(model_folder_path) / std::filesystem::path("venv");
   try {
 #ifdef _WIN32
     auto executable = std::filesystem::path(data_folder_path) /
                       std::filesystem::path("Scripts");
 #else
-    auto executable = std::filesystem::path(data_folder_path) /
-                      std::filesystem::path("bin");
+    auto executable =
+        std::filesystem::path(data_folder_path) / std::filesystem::path("bin");
 #endif
 
     auto executable_str =
@@ -377,9 +378,14 @@ void PythonEngine::LoadModel(
     command.push_back((std::filesystem::path(model_folder_path) /
                        std::filesystem::path(model_config.script))
                           .string());
-    std::list<std::string> args{"--port",      model_config.port,
-                                "--log_path",  model_config.log_path,
-                                "--log_level", model_config.log_level};
+    std::list<std::string> args{"--port",
+                                model_config.port,
+                                "--log_path",
+                                (file_manager_utils::GetCortexLogPath() /
+                                 std::filesystem::path(model_config.log_path))
+                                    .string(),
+                                "--log_level",
+                                model_config.log_level};
     if (!model_config.extra_params.isNull() &&
         model_config.extra_params.isObject()) {
       for (const auto& key : model_config.extra_params.getMemberNames()) {
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index e404f2e19..f862d0ed0 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -11,6 +11,7 @@
 #include "cortex-common/EngineI.h"
 #include "extensions/template_renderer.h"
 #include "utils/file_logger.h"
+#include "utils/file_manager_utils.h"
 #ifdef _WIN32
 #include <process.h>
 #include <windows.h>

From f95cfef756a4e1d5680a471afcaa0c9616dba830 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 19 Dec 2024 15:14:43 +0700
Subject: [PATCH 20/34] fix: unitest for remote engine because change location
 of template renderer

---
 engine/test/components/CMakeLists.txt        | 2 +-
 engine/test/components/test_remote_engine.cc | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt
index 0df46cfc2..6ca836158 100644
--- a/engine/test/components/CMakeLists.txt
+++ b/engine/test/components/CMakeLists.txt
@@ -16,7 +16,7 @@ add_executable(${PROJECT_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/file_manager_utils.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/curl_utils.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/system_info_utils.cc
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/remote-engine/template_renderer.cc
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/template_renderer.cc
 )
 
 find_package(Drogon CONFIG REQUIRED)
diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc
index bfac76f49..5f1b85044 100644
--- a/engine/test/components/test_remote_engine.cc
+++ b/engine/test/components/test_remote_engine.cc
@@ -1,4 +1,4 @@
-#include "extensions/remote-engine/template_renderer.h"
+#include "extensions/template_renderer.h"
 #include "gtest/gtest.h"
 #include "utils/json_helper.h"
 
@@ -42,7 +42,7 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) {
 
     auto data = json_helper::ParseJsonString(message_with_system);
 
-    remote_engine::TemplateRenderer rdr;
+    extensions::TemplateRenderer rdr;
     auto res = rdr.Render(tpl, data);
 
     auto res_json = json_helper::ParseJsonString(res);
@@ -69,7 +69,7 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) {
 
     auto data = json_helper::ParseJsonString(message_without_system);
 
-    remote_engine::TemplateRenderer rdr;
+    extensions::TemplateRenderer rdr;
     auto res = rdr.Render(tpl, data);
 
     auto res_json = json_helper::ParseJsonString(res);

From 2ea032bc1b84e6e67b8ae8c68357f9ae91543713 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 19 Dec 2024 15:51:32 +0700
Subject: [PATCH 21/34] fix: CI build windows

---
 engine/services/model_service.cc    | 2 +-
 engine/utils/set_permission_utils.h | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index bb80a6dfa..e3d63fd82 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -554,7 +554,7 @@ ModelService::DownloadModelFromCortexsoAsync(
       }
       auto venv_zip = model_folder / std::filesystem::path("venv.zip");
       if (std::filesystem::exists(venv_zip)) {
-        if (archive_utils::ExtractArchive(venv_zip.string(), venv_path)) {
+        if (archive_utils::ExtractArchive(venv_zip.string(), venv_path.string())) {
           std::filesystem::remove_all(venv_zip);
           CTL_INF("Successfully extract venv.zip");
           // If extract success create pyvenv.cfg
diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h
index b91d0c383..6e3ede3d3 100644
--- a/engine/utils/set_permission_utils.h
+++ b/engine/utils/set_permission_utils.h
@@ -30,15 +30,14 @@ inline bool SetExecutePermission(const std::filesystem::path& filePath,
   // Clear read-only attribute
   DWORD fileAttributes = GetFileAttributes(exePath.c_str());
   if (fileAttributes == INVALID_FILE_ATTRIBUTES) {
-    CTL_ERROR << "Error accessing file: " << GetLastError() << std::endl;
+    CTL_ERROR("Error accessing file: " << GetLastError());
     return false;
   }
 
   fileAttributes &= ~FILE_ATTRIBUTE_READONLY;
 
   if (!SetFileAttributes(exePath.c_str(), fileAttributes)) {
-    CTL_ERROR << "Error setting file attributes: " << GetLastError()
-              << std::endl;
+    CTL_ERROR("Error setting file attributes: " << GetLastError());
     return false;
   }
 

From 595998022c1e8dd6976102d7225d585cb7ed8b35 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 19 Dec 2024 15:57:02 +0700
Subject: [PATCH 22/34] fix: CI build windows

---
 engine/utils/set_permission_utils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h
index 6e3ede3d3..de0a14155 100644
--- a/engine/utils/set_permission_utils.h
+++ b/engine/utils/set_permission_utils.h
@@ -30,14 +30,14 @@ inline bool SetExecutePermission(const std::filesystem::path& filePath,
   // Clear read-only attribute
   DWORD fileAttributes = GetFileAttributes(exePath.c_str());
   if (fileAttributes == INVALID_FILE_ATTRIBUTES) {
-    CTL_ERROR("Error accessing file: " << GetLastError());
+    CTL_ERR("Error accessing file: " << GetLastError());
     return false;
   }
 
   fileAttributes &= ~FILE_ATTRIBUTE_READONLY;
 
   if (!SetFileAttributes(exePath.c_str(), fileAttributes)) {
-    CTL_ERROR("Error setting file attributes: " << GetLastError());
+    CTL_ERR("Error setting file attributes: " << GetLastError());
     return false;
   }
 

From 09b56ad0b792fb5314a5efb99b2664468cdefa6c Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 23 Dec 2024 14:53:00 +0700
Subject: [PATCH 23/34] feat: add depends model.yml for python engine

---
 engine/config/model_config.h     | 24 ++++++++++++++
 engine/services/model_service.cc | 55 +++++++++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index 18150e76a..d8ede92f7 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -514,6 +514,7 @@ struct PythonModelConfig {
   std::string environment;
   std::vector<std::string> command;  // New command field
   std::vector<std::string> files;
+  std::vector<std::string> depends;
   std::string engine;
   Json::Value extra_params;  // Accept dynamic extra parameters
 
@@ -577,6 +578,13 @@ struct PythonModelConfig {
     }
     out << YAML::EndSeq;
 
+    // Serialize command as YAML list
+    out << YAML::Key << "depends" << YAML::Value << YAML::BeginSeq;
+    for (const auto& depend : depends) {
+      out << depend;
+    }
+    out << YAML::EndSeq;
+
     out << YAML::Key << "engine" << YAML::Value << engine;
 
     // Serialize extra_params as YAML
@@ -664,6 +672,12 @@ struct PythonModelConfig {
       }
     }
 
+    if (mlp["depends"] && mlp["depends"].IsSequence()) {
+      for (const auto& depend : mlp["depends"]) {
+        depends.push_back(depend.as<std::string>());
+      }
+    }
+
     if (mlp["extra_params"]) {
       for (YAML::const_iterator it = mlp["extra_params"].begin();
            it != mlp["extra_params"].end(); ++it) {
@@ -717,6 +731,10 @@ struct PythonModelConfig {
       root["files"].append(file);
     }
 
+    for (const auto& depend : depends) {
+      root["depends"].append(depend);
+    }
+
     root["engine"] = engine;
     root["extra_params"] = extra_params;  // Serialize the JSON value directly
 
@@ -791,6 +809,12 @@ struct PythonModelConfig {
       }
     }
 
+    if (mlp.isMember("depends")) {
+      for (const auto& depend : mlp["depends"]) {
+        depends.push_back(depend.asString());
+      }
+    }
+
     if (mlp.isMember("extra_params")) {
       extra_params = mlp["extra_params"];  // Directly assign the JSON value
     }
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index e3d63fd82..a6e3b4277 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -554,7 +554,8 @@ ModelService::DownloadModelFromCortexsoAsync(
       }
       auto venv_zip = model_folder / std::filesystem::path("venv.zip");
       if (std::filesystem::exists(venv_zip)) {
-        if (archive_utils::ExtractArchive(venv_zip.string(), venv_path.string())) {
+        if (archive_utils::ExtractArchive(venv_zip.string(),
+                                          venv_path.string())) {
           std::filesystem::remove_all(venv_zip);
           CTL_INF("Successfully extract venv.zip");
           // If extract success create pyvenv.cfg
@@ -839,6 +840,29 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
 
     // Check if Python model first
     if (mc.engine == kPythonEngine) {
+
+      config::PythonModelConfig python_model_config;
+      python_model_config.ReadFromYaml(
+          fmu::ToAbsoluteCortexDataPath(
+              fs::path(model_entry.value().path_to_model_yaml))
+              .string());
+      // Start all depends model
+      auto depends = python_model_config.depends;
+      for (auto& depend : depends) {
+        StartParameterOverride temp;
+        auto res = StartModel(depend, temp);
+        if (res.has_error()) {
+          CTL_WRN("Error: " + res.error());
+          for (auto& depend : depends) {
+            if (depend != model_handle) {
+              StopModel(depend);
+            }
+          }
+          return cpp::fail("Model failed to start dependency '" + depend +
+                           "' : " + res.error());
+        }
+      }
+
       json_data["model"] = model_handle;
       json_data["model_path"] =
           fmu::ToAbsoluteCortexDataPath(
@@ -860,6 +884,18 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
         return StartModelResult{.success = true, .warning = ""};
       } else {
         // only report to user the error
+        for (auto& depend : depends) {
+          StartParameterOverride temp;
+          auto res = StartModel(depend, temp);
+          if (res.has_error()) {
+            CTL_WRN("Error: " + res.error());
+            for (auto& depend : depends) {
+              if (depend != model_handle) {
+                StopModel(depend);
+              }
+            }
+          }
+        }
         CTL_ERR("Model failed to start with status code: " << status);
         return cpp::fail("Model failed to start: " +
                          data["message"].asString());
@@ -1020,6 +1056,23 @@ cpp::result<bool, std::string> ModelService::StopModel(
     if (bypass_check) {
       engine_name = kLlamaEngine;
     }
+
+    // Update for python engine
+    if (engine_name == kPythonEngine) {
+      auto model_entry = modellist_handler.GetModelInfo(model_handle);
+      config::PythonModelConfig python_model_config;
+      python_model_config.ReadFromYaml(
+          fmu::ToAbsoluteCortexDataPath(
+              fs::path(model_entry.value().path_to_model_yaml))
+              .string());
+      // Stop all depends model
+      auto depends = python_model_config.depends;
+      for (auto& depend : depends) {
+        StopModel(depend);
+      }
+    }
+
+    //
     assert(inference_svc_);
     auto ir = inference_svc_->UnloadModel(engine_name, model_handle);
     auto status = std::get<0>(ir)["status_code"].asInt();

From 4fb36881f245bf9425bcbf26d0d073776cd3f6ac Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 23 Dec 2024 17:33:15 +0700
Subject: [PATCH 24/34] fix: CI build

---
 engine/utils/jinja_utils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/engine/utils/jinja_utils.h b/engine/utils/jinja_utils.h
index f614f4745..12244599f 100644
--- a/engine/utils/jinja_utils.h
+++ b/engine/utils/jinja_utils.h
@@ -3,7 +3,7 @@
 #include <json/value.h>
 #include <string>
 
-#include "extensions/remote-engine/template_renderer.h"
+#include "extensions/template_renderer.h"
 #include "utils/chat-template.hpp"
 #include "utils/result.hpp"
 
@@ -14,7 +14,7 @@ inline cpp::result<std::string, std::string> RenderTemplate(
     bool add_generation_prompt = true) {
   try {
     auto converted_json =
-        remote_engine::TemplateRenderer().ConvertJsonValue(data);
+        extensions::TemplateRenderer().ConvertJsonValue(data);
 
     minja::chat_template chat_tmpl(tmpl, add_bos_token ? bos_token : "",
                                    add_eos_token ? eos_token : "");

From d5257194964891297d37b3054f0b97f186b72269 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 26 Dec 2024 23:09:40 +0700
Subject: [PATCH 25/34] stream response

---
 engine/extensions/python-engine/python_engine.cc | 3 ++-
 engine/extensions/python-engine/python_engine.h  | 7 ++++---
 engine/services/model_service.cc                 | 1 -
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 9bb5c5b54..f96293b92 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -600,7 +600,7 @@ void PythonEngine::HandleInference(
 
       // Render with error handling
       try {
-        transformed_request = renderer_.Render(transform_request, *json_body);
+        transformed_request = renderer_.Render(transform_request, body);
       } catch (const std::exception& e) {
         throw std::runtime_error("Template rendering error: " +
                                  std::string(e.what()));
@@ -622,6 +622,7 @@ void PythonEngine::HandleInference(
     if (body.isMember("stream") && body["stream"].asBool()) {
       response =
           MakeStreamPostRequest(model, path, transformed_request, callback);
+          return;
     } else {
       response = MakePostRequest(model, path, transformed_request);
     }
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index cf4cf1958..e45220a49 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -36,20 +36,21 @@ static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb,
   std::string chunk(ptr, size * nmemb);
 
   context->buffer += chunk;
-
+  LOG_INFO<< "start writing";
   // Process complete lines
   size_t pos;
   while ((pos = context->buffer.find('\n')) != std::string::npos) {
     std::string line = context->buffer.substr(0, pos);
     context->buffer = context->buffer.substr(pos + 1);
-
+    LOG_INFO << "line: "<<line;
     // Skip empty lines
     if (line.empty() || line == "\r")
       continue;
 
+
     // Skip [DONE] message
 
-    if (line == "") {
+    if (line == "data: [DONE]") {
       Json::Value status;
       status["is_done"] = true;
       status["has_error"] = false;
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 153cf96fb..1e5b739a9 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -899,7 +899,6 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
       } else {
         // only report to user the error
         for (auto& depend : depends) {
-          Json::Value temp;
 
           StopModel(depend);
         }

From 2bcedf699186f3e020b9ca64e8bf3e9f9c282512 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Thu, 26 Dec 2024 23:32:45 +0700
Subject: [PATCH 26/34] update set permission api

---
 engine/utils/set_permission_utils.h | 127 +++++++++++-----------------
 1 file changed, 49 insertions(+), 78 deletions(-)

diff --git a/engine/utils/set_permission_utils.h b/engine/utils/set_permission_utils.h
index de0a14155..c1c08ce8f 100644
--- a/engine/utils/set_permission_utils.h
+++ b/engine/utils/set_permission_utils.h
@@ -13,93 +13,64 @@
 #include "utils/logging_utils.h"
 namespace set_permission_utils {
 // Cross-platform method to set execute permission for a single file
-inline bool SetExecutePermission(const std::filesystem::path& filePath,
-                                 bool ownerOnly = false) {
-  std::error_code ec;
+[[nodiscard]] inline bool SetExecutePermission(const std::filesystem::path& filePath,
+                                             bool ownerOnly = false) noexcept {
+    try {
+        std::filesystem::perms current_perms = std::filesystem::status(filePath).permissions();
+        std::filesystem::perms new_perms;
 
-#ifdef _WIN32
-  // Windows execution permission handling
-  std::filesystem::path exePath = filePath;
-
-  // Add .exe extension if no extension exists
-  if (exePath.extension().empty()) {
-    exePath += ".exe";
-    std::filesystem::rename(filePath, exePath);
-  }
-
-  // Clear read-only attribute
-  DWORD fileAttributes = GetFileAttributes(exePath.c_str());
-  if (fileAttributes == INVALID_FILE_ATTRIBUTES) {
-    CTL_ERR("Error accessing file: " << GetLastError());
-    return false;
-  }
-
-  fileAttributes &= ~FILE_ATTRIBUTE_READONLY;
-
-  if (!SetFileAttributes(exePath.c_str(), fileAttributes)) {
-    CTL_ERR("Error setting file attributes: " << GetLastError());
-    return false;
-  }
-
-#else
-  // POSIX systems (Linux, macOS)
-  struct stat st;
-  if (stat(filePath.c_str(), &st) != 0) {
-    CTL_ERR("Error getting file stats: " << strerror(errno));
-    return false;
-  }
-
-  // Set execute permissions based on ownerOnly flag
-  mode_t newMode;
-  if (ownerOnly) {
-    // Only owner can execute
-    newMode = (st.st_mode & ~(S_IXGRP | S_IXOTH)) | S_IXUSR;
-  } else {
-    // Everyone can execute
-    newMode = st.st_mode | S_IXUSR |  // Owner execute
-              S_IXGRP |               // Group execute
-              S_IXOTH;                // Others execute
-  }
-
-  if (chmod(filePath.c_str(), newMode) != 0) {
-    CTL_ERR("Error setting execute permissions: " << strerror(errno));
-    return false;
-  }
-#endif
+        if (ownerOnly) {
+            new_perms = current_perms | std::filesystem::perms::owner_exec;
+            // Remove group and others execute permissions
+            new_perms &= ~(std::filesystem::perms::group_exec | std::filesystem::perms::others_exec);
+        } else {
+            new_perms = current_perms | std::filesystem::perms::owner_exec |
+                       std::filesystem::perms::group_exec |
+                       std::filesystem::perms::others_exec;
+        }
 
-  return true;
+        std::filesystem::permissions(filePath, new_perms, 
+                                   std::filesystem::perm_options::replace);
+        return true;
+    } catch (const std::filesystem::filesystem_error& e) {
+        CTL_ERR("Permission error for file " << filePath.string() 
+                << ": " << e.what());
+        return false;
+    } catch (const std::exception& e) {
+        CTL_ERR("Unexpected error for file " << filePath.string() 
+                << ": " << e.what());
+        return false;
+    }
 }
-inline std::vector<std::filesystem::path> SetExecutePermissionsRecursive(
-    const std::filesystem::path& directoryPath, bool ownerOnly = false,
+
+[[nodiscard]] inline std::vector<std::filesystem::path> SetExecutePermissionsRecursive(
+    const std::filesystem::path& directoryPath,
+    bool ownerOnly = false,
     bool skipDirectories = true) {
-  std::vector<std::filesystem::path> modifiedFiles;
+    std::vector<std::filesystem::path> modifiedFiles;
+    modifiedFiles.reserve(100);  // Reserve space to prevent frequent reallocations
 
-  try {
-    // Iterate through all files and subdirectories
-    for (const auto& entry :
-         std::filesystem::recursive_directory_iterator(directoryPath)) {
-      // Skip directories if specified
-      if (skipDirectories && entry.is_directory()) {
-        continue;
-      }
+    try {
+        const auto options = std::filesystem::directory_options::skip_permission_denied |
+                           std::filesystem::directory_options::follow_directory_symlink;
+        
+        for (const auto& entry : 
+             std::filesystem::recursive_directory_iterator(directoryPath, options)) {
+            if (skipDirectories && entry.is_directory()) {
+                continue;
+            }
 
-      // Only process files
-      if (entry.is_regular_file()) {
-        try {
-          if (SetExecutePermission(entry.path(), ownerOnly)) {
-            modifiedFiles.push_back(entry.path());
-          }
-        } catch (const std::exception& e) {
-          CTL_ERR("Error processing file " + entry.path().string() + ": " +
-                  e.what());
+            if (entry.is_regular_file()) {
+                if (SetExecutePermission(entry.path(), ownerOnly)) {
+                    modifiedFiles.push_back(entry.path());
+                }
+            }
         }
-      }
+    } catch (const std::filesystem::filesystem_error& e) {
+        CTL_ERR("Filesystem error: " << e.what());
     }
-  } catch (const std::filesystem::filesystem_error& e) {
-    CTL_ERR("Filesystem error: " << e.what());
-  }
 
-  return modifiedFiles;
+    return modifiedFiles;
 }
 
 }  // namespace set_permission_utils
\ No newline at end of file

From 4b9e6dc40ce28bbdaabecf2e87dbe77de0619a01 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 27 Dec 2024 09:32:28 +0700
Subject: [PATCH 27/34] Fix: comment

---
 engine/utils/curl_utils.cc | 2 +-
 engine/utils/curl_utils.h  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc
index b60f76fc2..58a00b71a 100644
--- a/engine/utils/curl_utils.cc
+++ b/engine/utils/curl_utils.cc
@@ -243,7 +243,7 @@ cpp::result<YAML::Node, std::string> ReadRemoteYaml(const std::string& url) {
 }
 
 cpp::result<Json::Value, std::string> SimpleGetJson(
-    const std::string& url, const int timeout, std::optional<bool> recursive) {
+    const std::string& url, const int timeout, bool recursive) {
   auto result = SimpleGet(url, timeout);
   if (result.has_error()) {
     CTL_ERR("Failed to get JSON from " + url + ": " + result.error());
diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h
index 8bf324dd9..84b8dbddb 100644
--- a/engine/utils/curl_utils.h
+++ b/engine/utils/curl_utils.h
@@ -32,9 +32,9 @@ cpp::result<YAML::Node, std::string> ReadRemoteYaml(const std::string& url);
  *
  * [timeout] is an optional parameter that specifies the timeout for the request. In second.
  */
-cpp::result<Json::Value, std::string> SimpleGetJson(
-    const std::string& url, const int timeout = -1,
-    std::optional<bool> recursive = true);
+cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
+                                                    const int timeout = -1,
+                                                    bool recursive = true);
 
 cpp::result<Json::Value, std::string> SimplePostJson(
     const std::string& url, const std::string& body = "");

From b84f04c14314a9a0f816f878e7ee75407e56705d Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 27 Dec 2024 11:32:52 +0700
Subject: [PATCH 28/34] Feat: stream response

---
 engine/controllers/server.cc                  | 63 ++++++++++++++-----
 .../extensions/python-engine/python_engine.h  |  1 -
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 1c455e262..c3d2038ae 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -129,6 +129,7 @@ void server::FineTuning(
 
 void server::Inference(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
+  auto json_body = req->getJsonObject();
   LOG_TRACE << "Start inference";
   auto q = std::make_shared<services::SyncQueue>();
   auto ir = inference_svc_->HandleInference(q, req->getJsonObject());
@@ -141,20 +142,36 @@ void server::Inference(const HttpRequestPtr& req,
     callback(resp);
     return;
   }
+  bool is_stream =
+      (*json_body).get("stream", false).asBool() ||
+      (*json_body).get("body", Json::Value()).get("stream", false).asBool();
+
   LOG_TRACE << "Wait to inference";
-  auto [status, res] = q->wait_and_pop();
-  LOG_DEBUG << "response: " << res.toStyledString();
-  auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
-  resp->setStatusCode(
-      static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-  callback(resp);
-  LOG_TRACE << "Done  inference";
+  if (is_stream) {
+    auto model_id = (*json_body).get("model", "invalid_model").asString();
+    auto engine_type = [this, &json_body]() -> std::string {
+      if (!inference_svc_->HasFieldInReq(json_body, "engine")) {
+        return kLlamaRepo;
+      } else {
+        return (*(json_body)).get("engine", kLlamaRepo).asString();
+      }
+    }();
+    ProcessStreamRes(callback, q, engine_type, model_id);
+  } else {
+    auto [status, res] = q->wait_and_pop();
+    LOG_DEBUG << "response: " << res.toStyledString();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(
+        static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
+    callback(resp);
+    LOG_TRACE << "Done  inference";
+  }
 }
 
 void server::RouteRequest(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-
+  auto json_body = req->getJsonObject();
   LOG_TRACE << "Start route request";
   auto q = std::make_shared<services::SyncQueue>();
   auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject());
@@ -167,14 +184,30 @@ void server::RouteRequest(
     callback(resp);
     return;
   }
+  bool is_stream =
+      (*json_body).get("stream", false).asBool() ||
+      (*json_body).get("body", Json::Value()).get("stream", false).asBool();
   LOG_TRACE << "Wait to route request";
-  auto [status, res] = q->wait_and_pop();
-  LOG_DEBUG << "response: " << res.toStyledString();
-  auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
-  resp->setStatusCode(
-      static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-  callback(resp);
-  LOG_TRACE << "Done  route request";
+  if (is_stream) {
+
+    auto model_id = (*json_body).get("model", "invalid_model").asString();
+    auto engine_type = [this, &json_body]() -> std::string {
+      if (!inference_svc_->HasFieldInReq(json_body, "engine")) {
+        return kLlamaRepo;
+      } else {
+        return (*(json_body)).get("engine", kLlamaRepo).asString();
+      }
+    }();
+    ProcessStreamRes(callback, q, engine_type, model_id);
+  } else {
+    auto [status, res] = q->wait_and_pop();
+    LOG_DEBUG << "response: " << res.toStyledString();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(
+        static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
+    callback(resp);
+    LOG_TRACE << "Done route request";
+  }
 }
 
 void server::LoadModel(const HttpRequestPtr& req,
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index e45220a49..9eb7be913 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -36,7 +36,6 @@ static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb,
   std::string chunk(ptr, size * nmemb);
 
   context->buffer += chunk;
-  LOG_INFO<< "start writing";
   // Process complete lines
   size_t pos;
   while ((pos = context->buffer.find('\n')) != std::string::npos) {

From 839cce42d3636640de21215df2c24196f745df9c Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Sat, 28 Dec 2024 22:31:47 +0700
Subject: [PATCH 29/34] fix: run concurrent request with stream mode

---
 engine/extensions/python-engine/python_engine.cc | 11 +++++++----
 engine/extensions/python-engine/python_engine.h  |  2 ++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index f96293b92..db5c8e892 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -16,7 +16,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb,
   return size * nmemb;
 }
 
-PythonEngine::PythonEngine() {
+PythonEngine::PythonEngine():q_(4 /*n_parallel*/, "python_engine") {
   curl_global_init(CURL_GLOBAL_ALL);
 }
 
@@ -620,9 +620,12 @@ void PythonEngine::HandleInference(
   CurlResponse response;
   if (method == "post") {
     if (body.isMember("stream") && body["stream"].asBool()) {
-      response =
-          MakeStreamPostRequest(model, path, transformed_request, callback);
-          return;
+      q_.runTaskInQueue(
+          [this, model, path, transformed_request, cb = std::move(callback)] {
+            MakeStreamPostRequest(model, path, transformed_request, cb);
+          });
+
+      return;
     } else {
       response = MakePostRequest(model, path, transformed_request);
     }
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index 9eb7be913..1c4286eac 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -8,6 +8,7 @@
 #include <string>
 #include <unordered_map>
 #include "config/model_config.h"
+#include "trantor/utils/ConcurrentTaskQueue.h"
 #include "cortex-common/EngineI.h"
 #include "extensions/template_renderer.h"
 #include "utils/file_logger.h"
@@ -91,6 +92,7 @@ class PythonEngine : public EngineI {
   extensions::TemplateRenderer renderer_;
   std::unique_ptr<trantor::FileLogger> async_file_logger_;
   std::unordered_map<std::string, pid_t> processMap;
+  trantor::ConcurrentTaskQueue q_;
 
   // Helper functions
   CurlResponse MakePostRequest(const std::string& model,

From 035f2d5f5eed18f606820435198b47a46b114830 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 30 Dec 2024 12:19:11 +0700
Subject: [PATCH 30/34] Fix: remove unnecessary interface

---
 engine/common/base.h        | 18 +++---------------
 engine/controllers/server.h | 13 +++++--------
 2 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/engine/common/base.h b/engine/common/base.h
index c572a1823..b5de09059 100644
--- a/engine/common/base.h
+++ b/engine/common/base.h
@@ -5,7 +5,7 @@ using namespace drogon;
 
 class BaseModel {
  public:
-  virtual ~BaseModel() {}
+  virtual ~BaseModel() = default;
 
   // Model management
   virtual void LoadModel(
@@ -27,7 +27,7 @@ class BaseModel {
 
 class BaseChatCompletion {
  public:
-  virtual ~BaseChatCompletion() {}
+  virtual ~BaseChatCompletion() = default;
 
   // General chat method
   virtual void ChatCompletion(
@@ -37,7 +37,7 @@ class BaseChatCompletion {
 
 class BaseEmbedding {
  public:
-  virtual ~BaseEmbedding() {}
+  virtual ~BaseEmbedding() = default;
 
   // Implement embedding functionality specific to chat
   virtual void Embedding(
@@ -47,15 +47,3 @@ class BaseEmbedding {
   // The derived class can also override other methods if needed
 };
 
-class BasePythonModel {
- public:
-  virtual ~BasePythonModel() {}
-
-  // Model management
-  virtual void Inference(
-      const HttpRequestPtr& req,
-      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
-  virtual void RouteRequest(
-      const HttpRequestPtr& req,
-      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
-};
\ No newline at end of file
diff --git a/engine/controllers/server.h b/engine/controllers/server.h
index b6b125f97..5f2a14677 100644
--- a/engine/controllers/server.h
+++ b/engine/controllers/server.h
@@ -25,8 +25,7 @@ namespace inferences {
 class server : public drogon::HttpController<server, false>,
                public BaseModel,
                public BaseChatCompletion,
-               public BaseEmbedding,
-               public BasePythonModel {
+               public BaseEmbedding {
  public:
   server(std::shared_ptr<services::InferenceService> inference_service,
          std::shared_ptr<EngineService> engine_service);
@@ -73,12 +72,10 @@ class server : public drogon::HttpController<server, false>,
   void FineTuning(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
-  void Inference(
-      const HttpRequestPtr& req,
-      std::function<void(const HttpResponsePtr&)>&& callback) override;
-  void RouteRequest(
-      const HttpRequestPtr& req,
-      std::function<void(const HttpResponsePtr&)>&& callback) override;
+  void Inference(const HttpRequestPtr& req,
+                 std::function<void(const HttpResponsePtr&)>&& callback);
+  void RouteRequest(const HttpRequestPtr& req,
+                    std::function<void(const HttpResponsePtr&)>&& callback);
 
  private:
   void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,

From 75625bbfb89ba482556a3e92fcce6c68bcb00bf4 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 30 Dec 2024 12:34:18 +0700
Subject: [PATCH 31/34] Fix comment

---
 engine/controllers/server.cc     |  4 ++--
 engine/services/model_service.cc | 15 +++++++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index cc5cee54a..961798d2c 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -130,7 +130,7 @@ void server::FineTuning(
 void server::Inference(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
   LOG_TRACE << "Start inference";
-  auto q = std::make_shared<services::SyncQueue>();
+  auto q = std::make_shared<SyncQueue>();
   auto ir = inference_svc_->HandleInference(q, req->getJsonObject());
   LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString();
   if (ir.has_error()) {
@@ -156,7 +156,7 @@ void server::RouteRequest(
     std::function<void(const HttpResponsePtr&)>&& callback) {
 
   LOG_TRACE << "Start route request";
-  auto q = std::make_shared<services::SyncQueue>();
+  auto q = std::make_shared<SyncQueue>();
   auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject());
   LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString();
   if (ir.has_error()) {
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 34ca60b3b..d714c6b23 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -84,8 +84,7 @@ void ParseGguf(DatabaseService& db_service,
       CTL_ERR("Error adding model to modellist: " + result.error());
     }
   } else {
-    if (auto m = db_service.GetModelInfo(ggufDownloadItem.id);
-        m.has_value()) {
+    if (auto m = db_service.GetModelInfo(ggufDownloadItem.id); m.has_value()) {
       auto upd_m = m.value();
       upd_m.status = cortex::db::ModelStatus::Downloaded;
       if (auto r = db_service.UpdateModelEntry(ggufDownloadItem.id, upd_m);
@@ -472,7 +471,8 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
       model_size = model_size + item.bytes.value_or(0);
     }
     auto gguf_download_item = finishedTask.items[0];
-    ParseGguf(*db_service_, gguf_download_item, author, std::nullopt, model_size);
+    ParseGguf(*db_service_, gguf_download_item, author, std::nullopt,
+              model_size);
   };
 
   auto result = download_service_->AddDownloadTask(downloadTask, on_finished);
@@ -653,7 +653,8 @@ cpp::result<std::string, std::string> ModelService::DownloadModelFromCortexso(
   }
 
   std::string model_id{name + ":" + branch};
-  auto on_finished = [this, branch, model_id](const DownloadTask& finishedTask) {
+  auto on_finished = [this, branch,
+                      model_id](const DownloadTask& finishedTask) {
     const DownloadItem* model_yml_item = nullptr;
     auto need_parse_gguf = true;
 
@@ -824,8 +825,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
     constexpr const int kDefautlContextLength = 8192;
     int max_model_context_length = kDefautlContextLength;
     Json::Value json_data;
-
-    auto model_entry = modellist_handler.GetModelInfo(model_handle);
+    auto model_entry = db_service_->GetModelInfo(model_handle);
     if (model_entry.has_error()) {
       CTL_WRN("Error: " + model_entry.error());
       return cpp::fail(model_entry.error());
@@ -842,7 +842,6 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
       config::PythonModelConfig python_model_config;
       python_model_config.ReadFromYaml(
 
-
           fmu::ToAbsoluteCortexDataPath(
               fs::path(model_entry.value().path_to_model_yaml))
               .string());
@@ -1051,7 +1050,7 @@ cpp::result<bool, std::string> ModelService::StopModel(
 
     // Update for python engine
     if (engine_name == kPythonEngine) {
-      auto model_entry = modellist_handler.GetModelInfo(model_handle);
+      auto model_entry = db_service_->GetModelInfo(model_handle);
       config::PythonModelConfig python_model_config;
       python_model_config.ReadFromYaml(
           fmu::ToAbsoluteCortexDataPath(

From dabc154486fa0167ec4159b4fba54bdd8713795b Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Mon, 30 Dec 2024 17:52:43 +0700
Subject: [PATCH 32/34] Fix: comment review

---
 engine/controllers/models.cc                  | 19 ++--
 .../extensions/python-engine/python_engine.cc | 91 ++++---------------
 .../extensions/python-engine/python_engine.h  |  2 +
 engine/services/download_service.cc           |  3 -
 engine/services/model_service.cc              |  5 +-
 engine/utils/curl_utils.cc                    | 21 ++++-
 engine/utils/curl_utils.h                     |  5 +-
 7 files changed, 50 insertions(+), 96 deletions(-)

diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index d6b985ffb..34c6504ac 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -290,28 +290,27 @@ void Models::GetModel(const HttpRequestPtr& req,
       auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret);
       resp->setStatusCode(drogon::k200OK);
       callback(resp);
-    } else if (model_config.engine == kOpenAiEngine ||
-               model_config.engine == kAnthropicEngine) {
-      config::RemoteModelConfig remote_model_config;
-      remote_model_config.LoadFromYamlFile(
+    } else if (model_config.engine == kPythonEngine) {
+      config::PythonModelConfig python_model_config;
+      python_model_config.ReadFromYaml(
           fmu::ToAbsoluteCortexDataPath(
               fs::path(model_entry.value().path_to_model_yaml))
               .string());
-      ret = remote_model_config.ToJson();
-      ret["id"] = remote_model_config.model;
+      ret = python_model_config.ToJson();
+      ret["id"] = python_model_config.model;
       ret["object"] = "model";
       ret["result"] = "OK";
       auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
       resp->setStatusCode(k200OK);
       callback(resp);
     } else {
-      config::PythonModelConfig python_model_config;
-      python_model_config.ReadFromYaml(
+      config::RemoteModelConfig remote_model_config;
+      remote_model_config.LoadFromYamlFile(
           fmu::ToAbsoluteCortexDataPath(
               fs::path(model_entry.value().path_to_model_yaml))
               .string());
-      ret = python_model_config.ToJson();
-      ret["id"] = python_model_config.model;
+      ret = remote_model_config.ToJson();
+      ret["id"] = remote_model_config.model;
       ret["object"] = "model";
       ret["result"] = "OK";
       auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 12c25a76c..ddf6784e8 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -16,9 +16,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb,
   return size * nmemb;
 }
 
-PythonEngine::PythonEngine() {
-  curl_global_init(CURL_GLOBAL_ALL);
-}
+PythonEngine::PythonEngine() {}
 
 PythonEngine::~PythonEngine() {
   curl_global_cleanup();
@@ -172,69 +170,33 @@ bool PythonEngine::TerminateModelProcess(const std::string& model) {
 CurlResponse PythonEngine::MakeGetRequest(const std::string& model,
                                           const std::string& path) {
   auto config = models_[model];
-  CURL* curl = curl_easy_init();
-  CurlResponse response;
-
-  if (!curl) {
-    response.error = true;
-    response.error_message = "Failed to initialize CURL";
-    return response;
-  }
-
   std::string full_url = "http://localhost:" + config.port + path;
+  CurlResponse response;
 
-  struct curl_slist* headers = nullptr;
-
-  headers = curl_slist_append(headers, "Content-Type: application/json");
-
-  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-
-  std::string response_string;
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
-
-  CURLcode res = curl_easy_perform(curl);
-  if (res != CURLE_OK) {
+  auto result = curl_utils::SimpleRequest(full_url, RequestType::GET);
+  if (result.has_error()) {
     response.error = true;
-    response.error_message = curl_easy_strerror(res);
+    response.error_message = result.error();
   } else {
-    response.body = response_string;
+    response.body = result.value();
   }
-
-  curl_slist_free_all(headers);
-  curl_easy_cleanup(curl);
   return response;
 }
 CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model,
                                              const std::string& path) {
   auto config = models_[model];
-  CURL* curl = curl_easy_init();
-  CurlResponse response;
-
-  if (!curl) {
-    response.error = true;
-    response.error_message = "Failed to initialize CURL";
-    return response;
-  }
   std::string full_url = "http://localhost:" + config.port + path;
+  CurlResponse response;
 
-  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
-  curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
-
-  std::string response_string;
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+  auto result = curl_utils::SimpleRequest(full_url, RequestType::DEL);
 
-  CURLcode res = curl_easy_perform(curl);
-  if (res != CURLE_OK) {
+  if (result.has_error()) {
     response.error = true;
-    response.error_message = curl_easy_strerror(res);
+    response.error_message = result.error();
   } else {
-    response.body = response_string;
+    response.body = result.value();
   }
 
-  curl_easy_cleanup(curl);
   return response;
 }
 
@@ -242,38 +204,17 @@ CurlResponse PythonEngine::MakePostRequest(const std::string& model,
                                            const std::string& path,
                                            const std::string& body) {
   auto config = models_[model];
-  CURL* curl = curl_easy_init();
-  CurlResponse response;
-
-  if (!curl) {
-    response.error = true;
-    response.error_message = "Failed to initialize CURL";
-    return response;
-  }
   std::string full_url = "http://localhost:" + config.port + path;
 
-  struct curl_slist* headers = nullptr;
-  headers = curl_slist_append(headers, "Content-Type: application/json");
-
-  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-
-  curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
-
-  std::string response_string;
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+  CurlResponse response;
+  auto result = curl_utils::SimpleRequest(full_url, RequestType::POST, body);
 
-  CURLcode res = curl_easy_perform(curl);
-  if (res != CURLE_OK) {
+  if (result.has_error()) {
     response.error = true;
-    response.error_message = curl_easy_strerror(res);
+    response.error_message = result.error();
   } else {
-    response.body = response_string;
+    response.body = result.value();
   }
-
-  curl_slist_free_all(headers);
-  curl_easy_cleanup(curl);
   return response;
 }
 
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index f862d0ed0..7b112f435 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -12,6 +12,8 @@
 #include "extensions/template_renderer.h"
 #include "utils/file_logger.h"
 #include "utils/file_manager_utils.h"
+
+#include "utils/curl_utils.h"
 #ifdef _WIN32
 #include <process.h>
 #include <windows.h>
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index 9c7137168..d855c8f61 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -374,9 +374,6 @@ void DownloadService::ProcessTask(DownloadTask& task, int worker_id) {
       CTL_ERR("Failed to init curl!");
       return;
     }
-    if (!std::filesystem::exists(item.localPath.parent_path())) {
-      std::filesystem::create_directories(item.localPath.parent_path());
-    }
     auto file = fopen(item.localPath.string().c_str(), "wb");
     if (!file) {
       CTL_ERR("Failed to open output file " + item.localPath.string());
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index d714c6b23..c7925360b 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -103,7 +103,7 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
       .pathParams = {"api", "models", "cortexso", modelId, "tree", branch},
   };
 
-  auto result = curl_utils::SimpleGetJson(url.ToFullPath());
+  auto result = curl_utils::SimpleGetJsonRecursive(url.ToFullPath());
   if (result.has_error()) {
     return cpp::fail("Model " + modelId + " not found");
   }
@@ -126,6 +126,9 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
         .pathParams = {"cortexso", modelId, "resolve", branch, path}};
 
     auto local_path = model_container_path / path;
+    if (!std::filesystem::exists(local_path.parent_path())) {
+      std::filesystem::create_directories(local_path.parent_path());
+    }
     download_items.push_back(
         DownloadItem{.id = path,
                      .downloadUrl = download_url.ToFullPath(),
diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc
index 58a00b71a..be82b5cfa 100644
--- a/engine/utils/curl_utils.cc
+++ b/engine/utils/curl_utils.cc
@@ -242,8 +242,8 @@ cpp::result<YAML::Node, std::string> ReadRemoteYaml(const std::string& url) {
   }
 }
 
-cpp::result<Json::Value, std::string> SimpleGetJson(
-    const std::string& url, const int timeout, bool recursive) {
+cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
+                                                    const int timeout) {
   auto result = SimpleGet(url, timeout);
   if (result.has_error()) {
     CTL_ERR("Failed to get JSON from " + url + ": " + result.error());
@@ -257,11 +257,22 @@ cpp::result<Json::Value, std::string> SimpleGetJson(
                      " parsing error: " + reader.getFormattedErrorMessages());
   }
 
-  if (root.isArray() && recursive) {
+  return root;
+}
+
+cpp::result<Json::Value, std::string> SimpleGetJsonRecursive(
+    const std::string& url, const int timeout) {
+  auto result = SimpleGetJson(url, timeout);
+  if (result.has_error()) {
+    return result;
+  }
+  auto root = result.value();
+
+  if (root.isArray()) {
     for (const auto& value : root) {
       if (value["type"].asString() == "directory") {
-        auto temp =
-            SimpleGetJson(url + "/" + value["path"].asString(), timeout, recursive);
+        auto temp = SimpleGetJsonRecursive(url + "/" + value["path"].asString(),
+                                           timeout);
         if (!temp.has_error()) {
           if (temp.value().isArray()) {
             for (const auto& item : temp.value()) {
diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h
index 84b8dbddb..f33b7e8e5 100644
--- a/engine/utils/curl_utils.h
+++ b/engine/utils/curl_utils.h
@@ -33,8 +33,9 @@ cpp::result<YAML::Node, std::string> ReadRemoteYaml(const std::string& url);
  * [timeout] is an optional parameter that specifies the timeout for the request. In second.
  */
 cpp::result<Json::Value, std::string> SimpleGetJson(const std::string& url,
-                                                    const int timeout = -1,
-                                                    bool recursive = true);
+                                                    const int timeout = -1);
+cpp::result<Json::Value, std::string> SimpleGetJsonRecursive(const std::string& url,
+                                                    const int timeout = -1);
 
 cpp::result<Json::Value, std::string> SimplePostJson(
     const std::string& url, const std::string& body = "");

From 4b2f1fc61fe7ba171ea5546693e85b5e97dc40df Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Tue, 31 Dec 2024 09:32:07 +0700
Subject: [PATCH 33/34] fix comment

---
 engine/controllers/server.cc                    | 2 +-
 engine/extensions/python-engine/python_engine.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 9452935d2..8369f4713 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -184,7 +184,7 @@ void server::RouteRequest(
     callback(resp);
     return;
   }
-  bool is_stream =
+  auto is_stream =
       (*json_body).get("stream", false).asBool() ||
       (*json_body).get("body", Json::Value()).get("stream", false).asBool();
   LOG_TRACE << "Wait to route request";
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index 8b0e7b55f..866248b4f 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -44,7 +44,7 @@ static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb,
   while ((pos = context->buffer.find('\n')) != std::string::npos) {
     std::string line = context->buffer.substr(0, pos);
     context->buffer = context->buffer.substr(pos + 1);
-    LOG_INFO << "line: "<<line;
+    LOG_DEBUG << "line: "<<line;
     // Skip empty lines
     if (line.empty() || line == "\r")
       continue;

From 082cfe93bb8f3b94e732004324cce1c9c6fdd936 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Tue, 31 Dec 2024 10:44:33 +0700
Subject: [PATCH 34/34] fix comment

---
 engine/controllers/server.cc                     | 14 ++------------
 engine/extensions/python-engine/python_engine.cc | 14 +++++---------
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 8369f4713..fa02c61d2 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -158,12 +158,7 @@ void server::Inference(const HttpRequestPtr& req,
     }();
     ProcessStreamRes(callback, q, engine_type, model_id);
   } else {
-    auto [status, res] = q->wait_and_pop();
-    LOG_DEBUG << "response: " << res.toStyledString();
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
-    resp->setStatusCode(
-        static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-    callback(resp);
+    ProcessNonStreamRes(callback, *q);
     LOG_TRACE << "Done  inference";
   }
 }
@@ -200,12 +195,7 @@ void server::RouteRequest(
     }();
     ProcessStreamRes(callback, q, engine_type, model_id);
   } else {
-    auto [status, res] = q->wait_and_pop();
-    LOG_DEBUG << "response: " << res.toStyledString();
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
-    resp->setStatusCode(
-        static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-    callback(resp);
+    ProcessNonStreamRes(callback, *q);
     LOG_TRACE << "Done route request";
   }
 }
diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index 84a581fda..f0d325055 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -16,11 +16,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb,
   return size * nmemb;
 }
 
-
-PythonEngine::PythonEngine():q_(4 /*n_parallel*/, "python_engine") {
-}
-
-
+PythonEngine::PythonEngine() : q_(4 /*n_parallel*/, "python_engine") {}
 
 PythonEngine::~PythonEngine() {
   curl_global_cleanup();
@@ -173,7 +169,7 @@ bool PythonEngine::TerminateModelProcess(const std::string& model) {
 }
 CurlResponse PythonEngine::MakeGetRequest(const std::string& model,
                                           const std::string& path) {
-  auto config = models_[model];
+  auto const& config = models_[model];
   std::string full_url = "http://localhost:" + config.port + path;
   CurlResponse response;
 
@@ -188,7 +184,7 @@ CurlResponse PythonEngine::MakeGetRequest(const std::string& model,
 }
 CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model,
                                              const std::string& path) {
-  auto config = models_[model];
+  auto const& config = models_[model];
   std::string full_url = "http://localhost:" + config.port + path;
   CurlResponse response;
 
@@ -207,7 +203,7 @@ CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model,
 CurlResponse PythonEngine::MakePostRequest(const std::string& model,
                                            const std::string& path,
                                            const std::string& body) {
-  auto config = models_[model];
+  auto const& config = models_[model];
   std::string full_url = "http://localhost:" + config.port + path;
 
   CurlResponse response;
@@ -457,7 +453,7 @@ void PythonEngine::HandleChatCompletion(
 CurlResponse PythonEngine::MakeStreamPostRequest(
     const std::string& model, const std::string& path, const std::string& body,
     const std::function<void(Json::Value&&, Json::Value&&)>& callback) {
-  auto config = models_[model];
+  auto const& config = models_[model];
   CURL* curl = curl_easy_init();
   CurlResponse response;