From 9d2b02d842faa9e141a4eee8a06d88296837363b Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 26 Aug 2024 11:03:24 +0700
Subject: [PATCH 01/16] refactor: rename cortex-cpp to engine

---
 .gitignore                                    |   1 +
 .gitmodules                                   |   3 ++
 {cortex-cpp => engine}/.clang-format          |   0
 {cortex-cpp => engine}/.gitignore             |   0
 {cortex-cpp => engine}/CMakeLists.txt         |  30 +++++++++++++++---
 {cortex-cpp => engine}/CONTRIBUTING.md        |   0
 {cortex-cpp => engine}/LICENSE                |   0
 {cortex-cpp => engine}/Makefile               |   0
 {cortex-cpp => engine}/README.md              |   0
 {cortex-cpp => engine}/addon.cc               |   0
 {cortex-cpp => engine}/audio.md               |   0
 {cortex-cpp => engine}/binding/index.d.ts     |   0
 {cortex-cpp => engine}/binding/index.js       |   0
 {cortex-cpp => engine}/common/base.h          |   0
 {cortex-cpp => engine}/controllers/health.cc  |   0
 {cortex-cpp => engine}/controllers/health.h   |   0
 .../controllers/prelight.cc                   |   0
 {cortex-cpp => engine}/controllers/prelight.h |   0
 .../controllers/processManager.cc             |   0
 .../controllers/processManager.h              |   0
 {cortex-cpp => engine}/controllers/server.cc  |   0
 {cortex-cpp => engine}/controllers/server.h   |   0
 .../cortex-common/EngineI.h                   |   0
 .../cortex-common/cortexpythoni.h             |   0
 .../cortex-cpp-deps/.gitignore                |   0
 .../cortex-cpp-deps/CMakeLists.txt            |   0
 .../cortex-cpp-deps/README.md                 |   0
 .../examples/example-docker/Dockerfile        |   0
 .../examples/example-docker/alpine.Dockerfile |   0
 .../examples/example-docker/cuda.Dockerfile   |   0
 .../examples/grammars/json.gbnf               |   0
 .../examples/interface/README.md              |   0
 .../examples/interface/app.py                 |   0
 .../examples/interface/avatar.png             | Bin
 {cortex-cpp => engine}/install.bat            |   0
 {cortex-cpp => engine}/install.sh             |   0
 {cortex-cpp => engine}/install_deps.sh        |   0
 {cortex-cpp => engine}/main.cc                |   0
 {cortex-cpp => engine}/package.json           |   0
 {cortex-cpp => engine}/test/CMakeLists.txt    |   0
 .../test/components/CMakeLists.txt            |   0
 .../test/components/main.cc                   |   0
 .../test/components/test_cortex_utils.cc      |   0
 {cortex-cpp => engine}/utils/cortex_utils.h   |   0
 .../utils/cpuid/cpu_info.cc                   |   0
 {cortex-cpp => engine}/utils/cpuid/cpu_info.h |   0
 .../utils/cpuid/detail/cpu_info_impl.h        |   0
 .../utils/cpuid/detail/extract_x86_flags.h    |   0
 .../utils/cpuid/detail/init_gcc_x86.h         |   0
 .../utils/cpuid/detail/init_ios_clang_arm.h   |   0
 .../utils/cpuid/detail/init_linux_gcc_arm.h   |   0
 .../utils/cpuid/detail/init_msvc_arm.h        |   0
 .../utils/cpuid/detail/init_msvc_x86.h        |   0
 .../utils/cpuid/detail/init_unknown.h         |   0
 {cortex-cpp => engine}/utils/cpuid/platform.h |   0
 {cortex-cpp => engine}/utils/dr_wav.h         |   0
 {cortex-cpp => engine}/utils/dylib.h          |   0
 {cortex-cpp => engine}/utils/json.hpp         |   0
 {cortex-cpp => engine}/utils/logging_utils.h  |   0
 engine/vcpkg                                  |   1 +
 engine/vcpkg-configuration.json               |  14 ++++++++
 engine/vcpkg.json                             |  18 +++++++++++
 62 files changed, 63 insertions(+), 4 deletions(-)
 rename {cortex-cpp => engine}/.clang-format (100%)
 rename {cortex-cpp => engine}/.gitignore (100%)
 rename {cortex-cpp => engine}/CMakeLists.txt (80%)
 rename {cortex-cpp => engine}/CONTRIBUTING.md (100%)
 rename {cortex-cpp => engine}/LICENSE (100%)
 rename {cortex-cpp => engine}/Makefile (100%)
 rename {cortex-cpp => engine}/README.md (100%)
 rename {cortex-cpp => engine}/addon.cc (100%)
 rename {cortex-cpp => engine}/audio.md (100%)
 rename {cortex-cpp => engine}/binding/index.d.ts (100%)
 rename {cortex-cpp => engine}/binding/index.js (100%)
 rename {cortex-cpp => engine}/common/base.h (100%)
 rename {cortex-cpp => engine}/controllers/health.cc (100%)
 rename {cortex-cpp => engine}/controllers/health.h (100%)
 rename {cortex-cpp => engine}/controllers/prelight.cc (100%)
 rename {cortex-cpp => engine}/controllers/prelight.h (100%)
 rename {cortex-cpp => engine}/controllers/processManager.cc (100%)
 rename {cortex-cpp => engine}/controllers/processManager.h (100%)
 rename {cortex-cpp => engine}/controllers/server.cc (100%)
 rename {cortex-cpp => engine}/controllers/server.h (100%)
 rename {cortex-cpp => engine}/cortex-common/EngineI.h (100%)
 rename {cortex-cpp => engine}/cortex-common/cortexpythoni.h (100%)
 rename {cortex-cpp => engine}/cortex-cpp-deps/.gitignore (100%)
 rename {cortex-cpp => engine}/cortex-cpp-deps/CMakeLists.txt (100%)
 rename {cortex-cpp => engine}/cortex-cpp-deps/README.md (100%)
 rename {cortex-cpp => engine}/examples/example-docker/Dockerfile (100%)
 rename {cortex-cpp => engine}/examples/example-docker/alpine.Dockerfile (100%)
 rename {cortex-cpp => engine}/examples/example-docker/cuda.Dockerfile (100%)
 rename {cortex-cpp => engine}/examples/grammars/json.gbnf (100%)
 rename {cortex-cpp => engine}/examples/interface/README.md (100%)
 rename {cortex-cpp => engine}/examples/interface/app.py (100%)
 rename {cortex-cpp => engine}/examples/interface/avatar.png (100%)
 rename {cortex-cpp => engine}/install.bat (100%)
 rename {cortex-cpp => engine}/install.sh (100%)
 rename {cortex-cpp => engine}/install_deps.sh (100%)
 mode change 100755 => 100644
 rename {cortex-cpp => engine}/main.cc (100%)
 rename {cortex-cpp => engine}/package.json (100%)
 rename {cortex-cpp => engine}/test/CMakeLists.txt (100%)
 rename {cortex-cpp => engine}/test/components/CMakeLists.txt (100%)
 rename {cortex-cpp => engine}/test/components/main.cc (100%)
 rename {cortex-cpp => engine}/test/components/test_cortex_utils.cc (100%)
 rename {cortex-cpp => engine}/utils/cortex_utils.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/cpu_info.cc (100%)
 rename {cortex-cpp => engine}/utils/cpuid/cpu_info.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/cpu_info_impl.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/extract_x86_flags.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/init_gcc_x86.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/init_ios_clang_arm.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/init_linux_gcc_arm.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/init_msvc_arm.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/init_msvc_x86.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/detail/init_unknown.h (100%)
 rename {cortex-cpp => engine}/utils/cpuid/platform.h (100%)
 rename {cortex-cpp => engine}/utils/dr_wav.h (100%)
 rename {cortex-cpp => engine}/utils/dylib.h (100%)
 rename {cortex-cpp => engine}/utils/json.hpp (100%)
 rename {cortex-cpp => engine}/utils/logging_utils.h (100%)
 create mode 160000 engine/vcpkg
 create mode 100644 engine/vcpkg-configuration.json
 create mode 100644 engine/vcpkg.json

diff --git a/.gitignore b/.gitignore
index d3c4ef22b..237fb5b33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ cortex-js/package-lock.json
 .vscode
 cortex-js/command
 cortex-js/src/infrastructure/commanders/test/test_data
+**/vcpkg_installed
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
index e69de29bb..da05bcdd8 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "engine/vcpkg"]
+	path = engine/vcpkg
+	url = https://github.com/microsoft/vcpkg.git
diff --git a/cortex-cpp/.clang-format b/engine/.clang-format
similarity index 100%
rename from cortex-cpp/.clang-format
rename to engine/.clang-format
diff --git a/cortex-cpp/.gitignore b/engine/.gitignore
similarity index 100%
rename from cortex-cpp/.gitignore
rename to engine/.gitignore
diff --git a/cortex-cpp/CMakeLists.txt b/engine/CMakeLists.txt
similarity index 80%
rename from cortex-cpp/CMakeLists.txt
rename to engine/CMakeLists.txt
index a53e9fa70..46ea2c633 100644
--- a/cortex-cpp/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -33,9 +33,17 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(OPENSSL_USE_STATIC_LIBS TRUE)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install)
+# set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install)
 # This is the critical line for installing another package
 
+if(MSVC)
+  add_compile_options(
+      $<$<CONFIG:>:/MT> #---------|
+      $<$<CONFIG:Debug>:/MTd> #---|-- Statically link the runtime libraries
+      $<$<CONFIG:Release>:/MT> #--|
+  )
+endif()
+
 if(LLAMA_CUDA)
   cmake_minimum_required(VERSION 3.17)
 
@@ -75,6 +83,16 @@ add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")
 
 # add_subdirectory(test)
 
+find_package(jsoncpp CONFIG REQUIRED)
+find_package(Drogon CONFIG REQUIRED)
+find_package(yaml-cpp CONFIG REQUIRED)
+find_package(jinja2cpp CONFIG REQUIRED)
+find_package(httplib CONFIG REQUIRED)
+find_package(nlohmann_json CONFIG REQUIRED)
+find_package(CLI11 CONFIG REQUIRED)
+find_package(unofficial-minizip CONFIG REQUIRED)
+find_package(LibArchive REQUIRED)
+
 # Build using CMAKE-JS
 if(DEFINED CMAKE_JS_INC)
   if(("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") OR("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU"))
@@ -109,8 +127,12 @@ endif()
 #
 # and comment out the following lines
 
-find_package(Drogon CONFIG REQUIRED)
-
+target_link_libraries(${PROJECT_NAME} PRIVATE httplib::httplib)
+target_link_libraries(${PROJECT_NAME} PRIVATE nlohmann_json::nlohmann_json)
+target_link_libraries(${PROJECT_NAME} PRIVATE jinja2cpp)
+target_link_libraries(${PROJECT_NAME} PRIVATE CLI11::CLI11)
+target_link_libraries(${PROJECT_NAME} PRIVATE unofficial::minizip::minizip)
+target_link_libraries(${PROJECT_NAME} PRIVATE LibArchive::LibArchive)
 
 # Build using CMAKE-JS
 if(DEFINED CMAKE_JS_INC)
@@ -124,7 +146,7 @@ if(DEFINED CMAKE_JS_INC)
     execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})
   endif()
 else()
-  target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon
+  target_link_libraries(${PROJECT_NAME} PRIVATE JsonCpp::JsonCpp Drogon::Drogon OpenSSL::SSL OpenSSL::Crypto yaml-cpp::yaml-cpp
   ${CMAKE_THREAD_LIBS_INIT})
 endif()
 # ##############################################################################
diff --git a/cortex-cpp/CONTRIBUTING.md b/engine/CONTRIBUTING.md
similarity index 100%
rename from cortex-cpp/CONTRIBUTING.md
rename to engine/CONTRIBUTING.md
diff --git a/cortex-cpp/LICENSE b/engine/LICENSE
similarity index 100%
rename from cortex-cpp/LICENSE
rename to engine/LICENSE
diff --git a/cortex-cpp/Makefile b/engine/Makefile
similarity index 100%
rename from cortex-cpp/Makefile
rename to engine/Makefile
diff --git a/cortex-cpp/README.md b/engine/README.md
similarity index 100%
rename from cortex-cpp/README.md
rename to engine/README.md
diff --git a/cortex-cpp/addon.cc b/engine/addon.cc
similarity index 100%
rename from cortex-cpp/addon.cc
rename to engine/addon.cc
diff --git a/cortex-cpp/audio.md b/engine/audio.md
similarity index 100%
rename from cortex-cpp/audio.md
rename to engine/audio.md
diff --git a/cortex-cpp/binding/index.d.ts b/engine/binding/index.d.ts
similarity index 100%
rename from cortex-cpp/binding/index.d.ts
rename to engine/binding/index.d.ts
diff --git a/cortex-cpp/binding/index.js b/engine/binding/index.js
similarity index 100%
rename from cortex-cpp/binding/index.js
rename to engine/binding/index.js
diff --git a/cortex-cpp/common/base.h b/engine/common/base.h
similarity index 100%
rename from cortex-cpp/common/base.h
rename to engine/common/base.h
diff --git a/cortex-cpp/controllers/health.cc b/engine/controllers/health.cc
similarity index 100%
rename from cortex-cpp/controllers/health.cc
rename to engine/controllers/health.cc
diff --git a/cortex-cpp/controllers/health.h b/engine/controllers/health.h
similarity index 100%
rename from cortex-cpp/controllers/health.h
rename to engine/controllers/health.h
diff --git a/cortex-cpp/controllers/prelight.cc b/engine/controllers/prelight.cc
similarity index 100%
rename from cortex-cpp/controllers/prelight.cc
rename to engine/controllers/prelight.cc
diff --git a/cortex-cpp/controllers/prelight.h b/engine/controllers/prelight.h
similarity index 100%
rename from cortex-cpp/controllers/prelight.h
rename to engine/controllers/prelight.h
diff --git a/cortex-cpp/controllers/processManager.cc b/engine/controllers/processManager.cc
similarity index 100%
rename from cortex-cpp/controllers/processManager.cc
rename to engine/controllers/processManager.cc
diff --git a/cortex-cpp/controllers/processManager.h b/engine/controllers/processManager.h
similarity index 100%
rename from cortex-cpp/controllers/processManager.h
rename to engine/controllers/processManager.h
diff --git a/cortex-cpp/controllers/server.cc b/engine/controllers/server.cc
similarity index 100%
rename from cortex-cpp/controllers/server.cc
rename to engine/controllers/server.cc
diff --git a/cortex-cpp/controllers/server.h b/engine/controllers/server.h
similarity index 100%
rename from cortex-cpp/controllers/server.h
rename to engine/controllers/server.h
diff --git a/cortex-cpp/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
similarity index 100%
rename from cortex-cpp/cortex-common/EngineI.h
rename to engine/cortex-common/EngineI.h
diff --git a/cortex-cpp/cortex-common/cortexpythoni.h b/engine/cortex-common/cortexpythoni.h
similarity index 100%
rename from cortex-cpp/cortex-common/cortexpythoni.h
rename to engine/cortex-common/cortexpythoni.h
diff --git a/cortex-cpp/cortex-cpp-deps/.gitignore b/engine/cortex-cpp-deps/.gitignore
similarity index 100%
rename from cortex-cpp/cortex-cpp-deps/.gitignore
rename to engine/cortex-cpp-deps/.gitignore
diff --git a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt b/engine/cortex-cpp-deps/CMakeLists.txt
similarity index 100%
rename from cortex-cpp/cortex-cpp-deps/CMakeLists.txt
rename to engine/cortex-cpp-deps/CMakeLists.txt
diff --git a/cortex-cpp/cortex-cpp-deps/README.md b/engine/cortex-cpp-deps/README.md
similarity index 100%
rename from cortex-cpp/cortex-cpp-deps/README.md
rename to engine/cortex-cpp-deps/README.md
diff --git a/cortex-cpp/examples/example-docker/Dockerfile b/engine/examples/example-docker/Dockerfile
similarity index 100%
rename from cortex-cpp/examples/example-docker/Dockerfile
rename to engine/examples/example-docker/Dockerfile
diff --git a/cortex-cpp/examples/example-docker/alpine.Dockerfile b/engine/examples/example-docker/alpine.Dockerfile
similarity index 100%
rename from cortex-cpp/examples/example-docker/alpine.Dockerfile
rename to engine/examples/example-docker/alpine.Dockerfile
diff --git a/cortex-cpp/examples/example-docker/cuda.Dockerfile b/engine/examples/example-docker/cuda.Dockerfile
similarity index 100%
rename from cortex-cpp/examples/example-docker/cuda.Dockerfile
rename to engine/examples/example-docker/cuda.Dockerfile
diff --git a/cortex-cpp/examples/grammars/json.gbnf b/engine/examples/grammars/json.gbnf
similarity index 100%
rename from cortex-cpp/examples/grammars/json.gbnf
rename to engine/examples/grammars/json.gbnf
diff --git a/cortex-cpp/examples/interface/README.md b/engine/examples/interface/README.md
similarity index 100%
rename from cortex-cpp/examples/interface/README.md
rename to engine/examples/interface/README.md
diff --git a/cortex-cpp/examples/interface/app.py b/engine/examples/interface/app.py
similarity index 100%
rename from cortex-cpp/examples/interface/app.py
rename to engine/examples/interface/app.py
diff --git a/cortex-cpp/examples/interface/avatar.png b/engine/examples/interface/avatar.png
similarity index 100%
rename from cortex-cpp/examples/interface/avatar.png
rename to engine/examples/interface/avatar.png
diff --git a/cortex-cpp/install.bat b/engine/install.bat
similarity index 100%
rename from cortex-cpp/install.bat
rename to engine/install.bat
diff --git a/cortex-cpp/install.sh b/engine/install.sh
similarity index 100%
rename from cortex-cpp/install.sh
rename to engine/install.sh
diff --git a/cortex-cpp/install_deps.sh b/engine/install_deps.sh
old mode 100755
new mode 100644
similarity index 100%
rename from cortex-cpp/install_deps.sh
rename to engine/install_deps.sh
diff --git a/cortex-cpp/main.cc b/engine/main.cc
similarity index 100%
rename from cortex-cpp/main.cc
rename to engine/main.cc
diff --git a/cortex-cpp/package.json b/engine/package.json
similarity index 100%
rename from cortex-cpp/package.json
rename to engine/package.json
diff --git a/cortex-cpp/test/CMakeLists.txt b/engine/test/CMakeLists.txt
similarity index 100%
rename from cortex-cpp/test/CMakeLists.txt
rename to engine/test/CMakeLists.txt
diff --git a/cortex-cpp/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt
similarity index 100%
rename from cortex-cpp/test/components/CMakeLists.txt
rename to engine/test/components/CMakeLists.txt
diff --git a/cortex-cpp/test/components/main.cc b/engine/test/components/main.cc
similarity index 100%
rename from cortex-cpp/test/components/main.cc
rename to engine/test/components/main.cc
diff --git a/cortex-cpp/test/components/test_cortex_utils.cc b/engine/test/components/test_cortex_utils.cc
similarity index 100%
rename from cortex-cpp/test/components/test_cortex_utils.cc
rename to engine/test/components/test_cortex_utils.cc
diff --git a/cortex-cpp/utils/cortex_utils.h b/engine/utils/cortex_utils.h
similarity index 100%
rename from cortex-cpp/utils/cortex_utils.h
rename to engine/utils/cortex_utils.h
diff --git a/cortex-cpp/utils/cpuid/cpu_info.cc b/engine/utils/cpuid/cpu_info.cc
similarity index 100%
rename from cortex-cpp/utils/cpuid/cpu_info.cc
rename to engine/utils/cpuid/cpu_info.cc
diff --git a/cortex-cpp/utils/cpuid/cpu_info.h b/engine/utils/cpuid/cpu_info.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/cpu_info.h
rename to engine/utils/cpuid/cpu_info.h
diff --git a/cortex-cpp/utils/cpuid/detail/cpu_info_impl.h b/engine/utils/cpuid/detail/cpu_info_impl.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/cpu_info_impl.h
rename to engine/utils/cpuid/detail/cpu_info_impl.h
diff --git a/cortex-cpp/utils/cpuid/detail/extract_x86_flags.h b/engine/utils/cpuid/detail/extract_x86_flags.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/extract_x86_flags.h
rename to engine/utils/cpuid/detail/extract_x86_flags.h
diff --git a/cortex-cpp/utils/cpuid/detail/init_gcc_x86.h b/engine/utils/cpuid/detail/init_gcc_x86.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/init_gcc_x86.h
rename to engine/utils/cpuid/detail/init_gcc_x86.h
diff --git a/cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h b/engine/utils/cpuid/detail/init_ios_clang_arm.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h
rename to engine/utils/cpuid/detail/init_ios_clang_arm.h
diff --git a/cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h b/engine/utils/cpuid/detail/init_linux_gcc_arm.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h
rename to engine/utils/cpuid/detail/init_linux_gcc_arm.h
diff --git a/cortex-cpp/utils/cpuid/detail/init_msvc_arm.h b/engine/utils/cpuid/detail/init_msvc_arm.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/init_msvc_arm.h
rename to engine/utils/cpuid/detail/init_msvc_arm.h
diff --git a/cortex-cpp/utils/cpuid/detail/init_msvc_x86.h b/engine/utils/cpuid/detail/init_msvc_x86.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/init_msvc_x86.h
rename to engine/utils/cpuid/detail/init_msvc_x86.h
diff --git a/cortex-cpp/utils/cpuid/detail/init_unknown.h b/engine/utils/cpuid/detail/init_unknown.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/detail/init_unknown.h
rename to engine/utils/cpuid/detail/init_unknown.h
diff --git a/cortex-cpp/utils/cpuid/platform.h b/engine/utils/cpuid/platform.h
similarity index 100%
rename from cortex-cpp/utils/cpuid/platform.h
rename to engine/utils/cpuid/platform.h
diff --git a/cortex-cpp/utils/dr_wav.h b/engine/utils/dr_wav.h
similarity index 100%
rename from cortex-cpp/utils/dr_wav.h
rename to engine/utils/dr_wav.h
diff --git a/cortex-cpp/utils/dylib.h b/engine/utils/dylib.h
similarity index 100%
rename from cortex-cpp/utils/dylib.h
rename to engine/utils/dylib.h
diff --git a/cortex-cpp/utils/json.hpp b/engine/utils/json.hpp
similarity index 100%
rename from cortex-cpp/utils/json.hpp
rename to engine/utils/json.hpp
diff --git a/cortex-cpp/utils/logging_utils.h b/engine/utils/logging_utils.h
similarity index 100%
rename from cortex-cpp/utils/logging_utils.h
rename to engine/utils/logging_utils.h
diff --git a/engine/vcpkg b/engine/vcpkg
new file mode 160000
index 000000000..fb544875b
--- /dev/null
+++ b/engine/vcpkg
@@ -0,0 +1 @@
+Subproject commit fb544875b93bffebe96c6f720000003234cfba08
diff --git a/engine/vcpkg-configuration.json b/engine/vcpkg-configuration.json
new file mode 100644
index 000000000..c88ae390d
--- /dev/null
+++ b/engine/vcpkg-configuration.json
@@ -0,0 +1,14 @@
+{
+    "default-registry": {
+      "kind": "git",
+      "baseline": "a76e5d9e1c62a23b9e92353e5e25d8c34cda2b74",
+      "repository": "https://github.com/Cheaterdev/vcpkg"
+    },
+    "registries": [
+      {
+        "kind": "artifact",
+        "location": "https://github.com/microsoft/vcpkg-ce-catalog/archive/refs/heads/main.zip",
+        "name": "microsoft"
+      }
+    ]
+  }
\ No newline at end of file
diff --git a/engine/vcpkg.json b/engine/vcpkg.json
new file mode 100644
index 000000000..82118eb17
--- /dev/null
+++ b/engine/vcpkg.json
@@ -0,0 +1,18 @@
+{
+    "dependencies": [
+      "cli11",
+      {
+        "name": "cpp-httplib",
+        "features": [
+          "openssl"
+        ]
+      },
+      "drogon",
+      "jinja2cpp",
+      "jsoncpp",
+      "minizip",
+      "nlohmann-json",
+      "yaml-cpp",
+      "libarchive"
+    ]
+  }
\ No newline at end of file

From 1f80ff75a500f6dfd576d3abaf76b23974dc4e23 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 26 Aug 2024 11:29:01 +0700
Subject: [PATCH 02/16] refactor: import cortex.cpp code

---
 engine/CMakeLists.txt                     |  18 +-
 engine/commands/model_pull_cmd.cc         |  22 +
 engine/commands/model_pull_cmd.h          |  15 +
 engine/commands/start_model_cmd.cc        |  43 ++
 engine/commands/start_model_cmd.h         |  18 +
 engine/commands/stop_model_cmd.cc         |  31 ++
 engine/commands/stop_model_cmd.h          |  18 +
 engine/commands/stop_server_cmd.cc        |  20 +
 engine/commands/stop_server_cmd.h         |  15 +
 engine/config/gguf_parser.cc              | 582 ++++++++++++++++++++++
 engine/config/gguf_parser.h               |  71 +++
 engine/config/model_config.h              |  40 ++
 engine/config/yaml_config.cc              | 212 ++++++++
 engine/config/yaml_config.h               |  32 ++
 engine/controllers/command_line_parser.cc |  93 ++++
 engine/controllers/command_line_parser.h  |  13 +
 engine/services/download_service.cc       | 111 +++++
 engine/services/download_service.h        |  73 +++
 engine/utils/archive_utils.h              | 146 ++++++
 engine/utils/cortexso_parser.h            |  69 +++
 engine/utils/http_util.h                  |  24 +
 21 files changed, 1653 insertions(+), 13 deletions(-)
 create mode 100644 engine/commands/model_pull_cmd.cc
 create mode 100644 engine/commands/model_pull_cmd.h
 create mode 100644 engine/commands/start_model_cmd.cc
 create mode 100644 engine/commands/start_model_cmd.h
 create mode 100644 engine/commands/stop_model_cmd.cc
 create mode 100644 engine/commands/stop_model_cmd.h
 create mode 100644 engine/commands/stop_server_cmd.cc
 create mode 100644 engine/commands/stop_server_cmd.h
 create mode 100644 engine/config/gguf_parser.cc
 create mode 100644 engine/config/gguf_parser.h
 create mode 100644 engine/config/model_config.h
 create mode 100644 engine/config/yaml_config.cc
 create mode 100644 engine/config/yaml_config.h
 create mode 100644 engine/controllers/command_line_parser.cc
 create mode 100644 engine/controllers/command_line_parser.h
 create mode 100644 engine/services/download_service.cc
 create mode 100644 engine/services/download_service.h
 create mode 100644 engine/utils/archive_utils.h
 create mode 100644 engine/utils/cortexso_parser.h
 create mode 100644 engine/utils/http_util.h

diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 46ea2c633..d18d28f2d 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -163,21 +163,13 @@ else()
 endif()
 
 aux_source_directory(controllers CTL_SRC)
+aux_source_directory(services SERVICES_SRC)
 aux_source_directory(common COMMON_SRC)
 aux_source_directory(models MODEL_SRC)
 aux_source_directory(cortex-common CORTEX_COMMON)
-# aux_source_directory(filters FILTER_SRC) aux_source_directory(plugins
-# PLUGIN_SRC) 
-
-# drogon_create_views(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/views
-# ${CMAKE_CURRENT_BINARY_DIR}) use the following line to create views with
-# namespaces. drogon_create_views(${PROJECT_NAME}
-# ${CMAKE_CURRENT_SOURCE_DIR}/views ${CMAKE_CURRENT_BINARY_DIR} TRUE)
+aux_source_directory(config CONFIG_SRC)
+aux_source_directory(commands COMMANDS_SRC)
 
 target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} )
-# ${CMAKE_CURRENT_SOURCE_DIR}/models)
-target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC})
-# ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC})
-# ##############################################################################
-# uncomment the following line for dynamically loading views set_property(TARGET
-# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)
+
+target_sources(${PROJECT_NAME} PRIVATE ${COMMANDS_SRC} ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC})
diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc
new file mode 100644
index 000000000..6135efb43
--- /dev/null
+++ b/engine/commands/model_pull_cmd.cc
@@ -0,0 +1,22 @@
+#include "model_pull_cmd.h"
+#include <utility>
+#include "services/download_service.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortexso_parser.h"
+
+namespace commands {
+ModelPullCmd::ModelPullCmd(std::string modelHandle)
+    : modelHandle_(std::move(modelHandle)) {}
+
+void ModelPullCmd::Exec() {
+  auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_);
+  if (downloadTask.has_value()) {
+    DownloadService downloadService;
+    downloadService.AddDownloadTask(downloadTask.value());
+    std::cout << "Download finished" << std::endl;
+  } else {
+    std::cout << "Model not found" << std::endl;
+  }
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_pull_cmd.h b/engine/commands/model_pull_cmd.h
new file mode 100644
index 000000000..2c5f658f2
--- /dev/null
+++ b/engine/commands/model_pull_cmd.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <string>
+
+namespace commands {
+
+class ModelPullCmd {
+ public:
+  ModelPullCmd(std::string modelHandle);
+  void Exec();
+
+ private:
+  std::string modelHandle_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/start_model_cmd.cc b/engine/commands/start_model_cmd.cc
new file mode 100644
index 000000000..341ba2f9d
--- /dev/null
+++ b/engine/commands/start_model_cmd.cc
@@ -0,0 +1,43 @@
+#include "start_model_cmd.h"
+#include "httplib.h"
+#include "nlohmann/json.hpp"
+#include "trantor/utils/Logger.h"
+
+namespace commands {
+StartModelCmd::StartModelCmd(std::string host, int port,
+                             const config::ModelConfig& mc)
+    : host_(std::move(host)), port_(port), mc_(mc) {}
+
+void StartModelCmd::Exec() {
+  httplib::Client cli(host_ + ":" + std::to_string(port_));
+  nlohmann::json json_data;
+  if (mc_.files.size() > 0) {
+    // TODO(sang) support multiple files
+    json_data["model_path"] = mc_.files[0];
+  } else {
+    LOG_WARN << "model_path is empty";
+    return;
+  }
+  json_data["model"] = mc_.name;
+  json_data["system_prompt"] = mc_.system_template;
+  json_data["user_prompt"] = mc_.user_template;
+  json_data["ai_prompt"] = mc_.ai_template;
+  json_data["ctx_len"] = mc_.ctx_len;
+  json_data["stop"] = mc_.stop;
+  json_data["engine"] = mc_.engine;
+
+  auto data_str = json_data.dump();
+
+  auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(),
+                      data_str.data(), data_str.size(), "application/json");
+  if (res) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      LOG_INFO << res->body;
+    }
+  } else {
+    auto err = res.error();
+    LOG_WARN << "HTTP error: " << httplib::to_string(err);
+  }
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/start_model_cmd.h b/engine/commands/start_model_cmd.h
new file mode 100644
index 000000000..27cfc59e6
--- /dev/null
+++ b/engine/commands/start_model_cmd.h
@@ -0,0 +1,18 @@
+#pragma once
+#include <string>
+#include <optional>
+#include "config/model_config.h"
+
+namespace commands {
+
+class StartModelCmd{
+ public:
+  StartModelCmd(std::string host, int port, const config::ModelConfig& mc);
+  void Exec();
+
+ private:
+  std::string host_;
+  int port_;
+  const config::ModelConfig& mc_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/stop_model_cmd.cc b/engine/commands/stop_model_cmd.cc
new file mode 100644
index 000000000..628007efe
--- /dev/null
+++ b/engine/commands/stop_model_cmd.cc
@@ -0,0 +1,31 @@
+#include "stop_model_cmd.h"
+#include "httplib.h"
+#include "nlohmann/json.hpp"
+#include "trantor/utils/Logger.h"
+
+namespace commands {
+StopModelCmd::StopModelCmd(std::string host, int port,
+                           const config::ModelConfig& mc)
+    : host_(std::move(host)), port_(port), mc_(mc) {}
+
+void StopModelCmd::Exec() {
+  httplib::Client cli(host_ + ":" + std::to_string(port_));
+  nlohmann::json json_data;
+  json_data["model"] = mc_.name;
+  json_data["engine"] = mc_.engine;
+
+  auto data_str = json_data.dump();
+
+  auto res = cli.Post("/inferences/server/unloadmodel", httplib::Headers(),
+                      data_str.data(), data_str.size(), "application/json");
+  if (res) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      LOG_INFO << res->body;
+    }
+  } else {
+    auto err = res.error();
+    LOG_WARN << "HTTP error: " << httplib::to_string(err);
+  }
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/stop_model_cmd.h b/engine/commands/stop_model_cmd.h
new file mode 100644
index 000000000..9ead32370
--- /dev/null
+++ b/engine/commands/stop_model_cmd.h
@@ -0,0 +1,18 @@
+#pragma once
+#include <string>
+#include <optional>
+#include "config/model_config.h"
+
+namespace commands {
+
+class StopModelCmd{
+ public:
+  StopModelCmd(std::string host, int port, const config::ModelConfig& mc);
+  void Exec();
+
+ private:
+  std::string host_;
+  int port_;
+  const config::ModelConfig& mc_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/stop_server_cmd.cc b/engine/commands/stop_server_cmd.cc
new file mode 100644
index 000000000..cb312ef99
--- /dev/null
+++ b/engine/commands/stop_server_cmd.cc
@@ -0,0 +1,20 @@
+#include "stop_server_cmd.h"
+#include "httplib.h"
+#include "trantor/utils/Logger.h"
+
+namespace commands {
+StopServerCmd::StopServerCmd(std::string host, int port)
+    : host_(std::move(host)), port_(port) {}
+
+void StopServerCmd::Exec() {
+  httplib::Client cli(host_ + ":" + std::to_string(port_));
+  auto res = cli.Delete("/processManager/destroy");
+  if (res) {
+    LOG_INFO << res->body;
+  } else {
+    auto err = res.error();
+    LOG_WARN << "HTTP error: " << httplib::to_string(err);
+  }
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/stop_server_cmd.h b/engine/commands/stop_server_cmd.h
new file mode 100644
index 000000000..03735d81c
--- /dev/null
+++ b/engine/commands/stop_server_cmd.h
@@ -0,0 +1,15 @@
+#pragma once
+#include <string>
+
+namespace commands {
+
+class StopServerCmd{
+ public:
+  StopServerCmd(std::string host, int port);
+  void Exec();
+
+ private:
+  std::string host_;
+  int port_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc
new file mode 100644
index 000000000..00b461719
--- /dev/null
+++ b/engine/config/gguf_parser.cc
@@ -0,0 +1,582 @@
+#include <cstdint>
+#include <cstring>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <regex>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include <io.h>
+#include <windows.h>
+#else
+#include <sys/mman.h>  // For memory-mapped file
+#include <unistd.h>    // For file descriptors
+#endif
+
+#include <fcntl.h>  // For file descriptors
+
+#include <jinja2cpp/template.h>
+
+#include "gguf_parser.h"
+#include "trantor/utils/Logger.h"
+
+namespace config {
+void GGUFHandler::OpenFile(const std::string& file_path) {
+#ifdef _WIN32
+  HANDLE file_handle_ = INVALID_HANDLE_VALUE;
+  HANDLE file_mapping_ = nullptr;
+  file_handle_ =
+      CreateFileA(file_path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr,
+                  OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
+  if (file_handle_ == INVALID_HANDLE_VALUE) {
+    throw std::runtime_error("Failed to open file");
+  }
+
+  // Get the file size
+  LARGE_INTEGER file_size_struct;
+  if (!GetFileSizeEx(file_handle_, &file_size_struct)) {
+    CloseHandle(file_handle_);
+    throw std::runtime_error("Failed to get file size");
+  }
+  file_size_ = static_cast<size_t>(file_size_struct.QuadPart);
+
+  // Create a file mapping object
+  file_mapping_ =
+      CreateFileMappingA(file_handle_, nullptr, PAGE_READONLY, 0, 0, nullptr);
+  if (file_mapping_ == nullptr) {
+    CloseHandle(file_handle_);
+    throw std::runtime_error("Failed to create file mapping");
+  }
+
+  // Map the file into memory
+  data_ = static_cast<uint8_t*>(
+      MapViewOfFile(file_mapping_, FILE_MAP_READ, 0, 0, file_size_));
+  if (data_ == nullptr) {
+    CloseHandle(file_mapping_);
+    CloseHandle(file_handle_);
+    throw std::runtime_error("Failed to map file");
+  }
+
+  // Close the file handle, as it is no longer needed after mapping
+  CloseHandle(file_handle_);
+
+#else
+  FILE* fd = fopen(file_path.c_str(), "rb");
+  if (!fd) {
+    perror("Error opening file");
+    throw std::runtime_error("Failed to open file");
+  }
+
+  // Get file size
+  // file_size_ = lseek(fd, 0, SEEK_END);
+  fseek(fd, 0, SEEK_END);  // move file pointer to end of file
+  file_size_ = ftell(fd);  // get the file size, in bytes
+  fclose(fd);
+  if (file_size_ == -1) {
+    perror("Error getting file size");
+    // close(fd);
+    throw std::runtime_error("Failed to get file size");
+  }
+  int file_descriptor = open(file_path.c_str(), O_RDONLY);
+  ;
+  // Memory-map the file
+  data_ = static_cast<uint8_t*>(
+      mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, file_descriptor, 0));
+  if (data_ == MAP_FAILED) {
+    perror("Error mapping file");
+    close(file_descriptor);
+    throw std::runtime_error("Failed to map file");
+  }
+
+  close(file_descriptor);
+
+#endif
+}
+
+void GGUFHandler::CloseFile() {
+#ifdef _WIN32
+  if (data_ != nullptr) {
+    UnmapViewOfFile(data_);
+    data_ = nullptr;
+  }
+#else
+  if (data_ != nullptr && data_ != MAP_FAILED) {
+    munmap(data_, file_size_);
+  }
+#endif
+}
+
+std::pair<std::size_t, std::string> GGUFHandler::ReadString(
+    std::size_t offset) const {
+  uint64_t length;
+  std::memcpy(&length, data_ + offset, sizeof(uint64_t));
+
+  std::string value(reinterpret_cast<const char*>(data_ + offset + 8), length);
+  return {8 + static_cast<std::size_t>(length), value};
+}
+
+size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
+                                      const std::string& key) {
+  switch (type) {
+    case 0:  // UINT8
+      metadata_uint8_[key] = data_[offset];
+      return 1;
+    case 1:  // INT8
+      metadata_int8_[key] = static_cast<int8_t>(data_[offset]);
+      return 1;
+    case 2:  // UINT16
+      metadata_uint16_[key] =
+          *reinterpret_cast<const uint16_t*>(data_ + offset);
+      return 2;
+    case 3:  // INT16
+      metadata_int16_[key] = *reinterpret_cast<const int16_t*>(data_ + offset);
+      return 2;
+    case 4:  // UINT32
+      metadata_uint32_[key] =
+          *reinterpret_cast<const uint32_t*>(data_ + offset);
+      return 4;
+    case 5:  // INT32
+      metadata_int32_[key] = *reinterpret_cast<const int32_t*>(data_ + offset);
+      return 4;
+    case 6:  // FLOAT32
+      metadata_float_[key] = *reinterpret_cast<const float*>(data_ + offset);
+      return 4;
+    case 7:  // BOOL
+      metadata_bool_[key] = data_[offset] != 0;
+      return 1;
+    case 8:  // STRING
+    {
+      auto [byte_length, value] = ReadString(offset);
+      metadata_string_[key] = value;
+      return byte_length;
+    }
+    case 9:  // ARRAY
+
+      return ReadArray(offset, key);
+    case 10:  // UINT64
+      metadata_uint64_[key] =
+          *reinterpret_cast<const uint64_t*>(data_ + offset);
+      return 8;
+    case 11:  // INT64
+      metadata_int64_[key] = *reinterpret_cast<const int64_t*>(data_ + offset);
+      return 8;
+    case 12:  // FLOAT64
+      metadata_double_[key] = *reinterpret_cast<const double*>(data_ + offset);
+      return 8;
+    default:
+      throw std::runtime_error("Unsupported metadata type: " +
+                               std::to_string(type));
+  }
+}
+
+size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
+  uint32_t array_type = *reinterpret_cast<const uint32_t*>(data_ + offset);
+  // std::memcpy(&array_type, data_ + offset, sizeof(uint32_t));
+
+  uint64_t array_length =
+      *reinterpret_cast<const uint64_t*>(data_ + offset + 4);
+  // std::memcpy(&array_length, data_ + offset + 4, sizeof(uint64_t));
+  LOG_INFO << "\n"
+           << "Parsing array type: " << array_type
+           << ", array length:" << array_length << "\n";
+  std::size_t array_offset = 12;
+  std::vector<std::string> array_values_string;
+  std::vector<float> array_values_float;
+  uint8_t uint8_value;
+  int8_t int8_value;
+  uint16_t uint16_value;
+  int16_t int16_value;
+  uint32_t uint32_value;
+  int32_t int32_value;
+  float float_value;
+  bool bool_value;
+  std::string string_value;
+  uint64_t uint64_value;
+  int64_t int64_value;
+  double double_value;
+  size_t length;
+
+  for (uint64_t i = 0; i < array_length; ++i) {
+    // auto [byteLength, value] = ReadMetadataValue(array_type, offset + array_offset);
+    // assume that array ony has 2 type string and int
+    switch (array_type) {
+      case 0:
+        uint8_value = data_[offset + array_offset];
+        length = 1;
+        array_values_float.push_back(static_cast<float>(uint8_value));
+        break;
+      case 1: {
+        int8_value = static_cast<int8_t>(data_[offset + array_offset]);
+        length = 1;
+        array_values_float.push_back(static_cast<float>(int8_value));
+      }
+
+      break;
+      case 2:
+        uint16_value =
+            *reinterpret_cast<const uint16_t*>(data_ + offset + array_offset);
+        length = 2;
+        array_values_float.push_back(static_cast<float>(uint16_value));
+        break;
+      case 3:
+        int16_value =
+            *reinterpret_cast<const int16_t*>(data_ + offset + array_offset);
+        length = 2;
+        array_values_float.push_back(static_cast<float>(int16_value));
+        break;
+      case 4:
+        uint32_value =
+            *reinterpret_cast<const uint32_t*>(data_ + offset + array_offset);
+        length = 4;
+        array_values_float.push_back(static_cast<float>(uint32_value));
+        break;
+      case 5:
+        int32_value =
+            *reinterpret_cast<const int32_t*>(data_ + offset + array_offset);
+        length = 4;
+        array_values_float.push_back(static_cast<float>(int32_value));
+        break;
+      case 6:
+        float_value =
+            *reinterpret_cast<const float*>(data_ + offset + array_offset);
+        length = 4;
+        array_values_float.push_back(static_cast<float>(float_value));
+        break;
+      case 7:
+        bool_value = data_[offset + array_offset] != 0;
+        length = 1;
+        array_values_float.push_back(static_cast<float>(bool_value));
+        break;
+      case 8: {
+        uint64_t length_ =
+            *reinterpret_cast<const uint64_t*>(data_ + offset + array_offset);
+        std::string value(
+            reinterpret_cast<const char*>(data_ + offset + array_offset + 8),
+            length_);
+        length = 8 + static_cast<std::size_t>(length_);
+        array_values_string.push_back(value);
+      } break;
+      case 10:
+        uint64_value =
+            *reinterpret_cast<const uint64_t*>(data_ + offset + array_offset);
+        length = 8;
+        array_values_float.push_back(static_cast<float>(uint64_value));
+        break;
+      case 11:
+        int64_value =
+            *reinterpret_cast<const int64_t*>(data_ + offset + array_offset);
+        length = 8;
+        array_values_float.push_back(static_cast<float>(int64_value));
+        break;
+      case 12:
+        double_value =
+            *reinterpret_cast<const double*>(data_ + offset + array_offset);
+        length = 8;
+        array_values_float.push_back(static_cast<float>(double_value));
+        break;
+      default:
+        throw std::runtime_error("Unsupported metadata type: " +
+                                 std::to_string(array_type));
+        break;
+    }
+
+    array_offset += length;
+  }
+  if (array_values_string.size() > 0)
+    metadata_array_string_[key] = array_values_string;
+  else
+    metadata_array_float_[key] = array_values_float;
+  return array_offset;
+}
+
+void GGUFHandler::Parse(const std::string& file_path) {
+  OpenFile(file_path);
+  LOG_INFO << "GGUF magic number: " << *reinterpret_cast<const uint32_t*>(data_)
+           << "\n";
+  if (*reinterpret_cast<const uint32_t*>(data_) != GGUF_MAGIC_NUMBER) {
+    throw std::runtime_error("Not a valid GGUF file");
+  }
+
+  version_ = *reinterpret_cast<const uint32_t*>(data_ + 4);
+  tensor_count_ = *reinterpret_cast<const uint64_t*>(data_ + 8);
+  uint64_t metadata_kv_count = *reinterpret_cast<const uint64_t*>(data_ + 16);
+  LOG_INFO << "version: " << version_ << "\ntensor count: " << tensor_count_
+           << "\nmetadata key-value pairs: " << metadata_kv_count << "\n";
+
+  std::size_t offset = 24;
+
+  for (uint64_t i = 0; i < metadata_kv_count; ++i) {
+    LOG_INFO << "Parsing key-value number " << i << "\n";
+    auto [key_byte_length, key] = ReadString(offset);
+    offset += key_byte_length;
+    LOG_INFO << "key: " << key << "\n";
+    uint32_t value_type = *reinterpret_cast<const uint32_t*>(data_ + offset);
+    offset += 4;
+    LOG_INFO << "value type number: " << value_type << "\n";
+    size_t value_byte_length = ReadMetadataValue(value_type, offset, key);
+    offset += value_byte_length;
+    LOG_INFO << "-------------------------------------------- " << "\n";
+  }
+  PrintMetadata();
+  ModelConfigFromMetadata();
+  CloseFile();
+}
+
+void GGUFHandler::PrintMetadata() {
+  LOG_INFO << "GGUF Metadata:" << "\n";
+  for (const auto& [key, value] : metadata_uint8_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_int8_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_uint16_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_int16_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_uint32_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_int32_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_float_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_bool_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_string_) {
+
+    if (key.compare("tokenizer.chat_template") == 0) {
+      LOG_INFO << key << ": " << "\n" << value << "\n";
+
+      jinja2::Template chat_template;
+      chat_template.Load(value);
+      jinja2::ValuesMap params{
+          {"add_generation_prompt", true},
+          {"bos_token", "<|begin_of_text|>"},
+          {"eos_token", "<|eot_id|>"},
+          {"messages",
+           jinja2::ValuesList{
+               jinja2::ValuesMap{{"role", "system"},
+                                 {"content", "{system_message}"}},
+               jinja2::ValuesMap{{"role", "user"}, {"content", "{prompt}"}}}}};
+      std::string result = chat_template.RenderAsString(params).value();
+
+      LOG_INFO << "result jinja render: " << result << "\n";
+    } else {
+      LOG_INFO << key << ": " << value << "\n";
+    }
+  }
+
+  for (const auto& [key, value] : metadata_uint64_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_int64_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_double_)
+    LOG_INFO << key << ": " << value << "\n";
+
+  for (const auto& [key, value] : metadata_array_float_)
+    LOG_INFO << key << "num elements: " << value.size() << "\n";
+
+  for (const auto& [key, value] : metadata_array_string_)
+    LOG_INFO << key << " num elements: " << value.size() << "\n";
+}
+
+void GGUFHandler::ModelConfigFromMetadata() {
+  int eos_token, bos_token, max_tokens, version, ngl;
+  std::string chat_template, name, eos_string, bos_string;
+  std::vector<std::string> tokens, stop;
+  model_config_.top_p = 0.95;
+  model_config_.temperature = 0.7;
+  model_config_.frequency_penalty = 0;
+  model_config_.presence_penalty = 0;
+  model_config_.stream = true;
+  model_config_.engine = "cortex.llamacpp";
+  model_config_.created = std::time(nullptr);
+  model_config_.model = "model";
+  model_config_.owned_by = "";
+  model_config_.version;
+
+  // Get version, bos, eos id, contex_len, ngl from meta data
+  for (const auto& [key, value] : metadata_uint8_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+
+  for (const auto& [key, value] : metadata_int8_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+
+  for (const auto& [key, value] : metadata_uint16_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+
+  for (const auto& [key, value] : metadata_int16_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+
+  for (const auto& [key, value] : metadata_uint32_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+
+  for (const auto& [key, value] : metadata_int32_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+  for (const auto& [key, value] : metadata_uint64_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+
+  for (const auto& [key, value] : metadata_int64_) {
+    if (key.compare("general.quantization_version") == 0)
+      version = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.bos_token_id") == 0)
+      bos_token = static_cast<int>(value);
+    else if (key.compare("tokenizer.ggml.eos_token_id") == 0)
+      eos_token = static_cast<int>(value);
+    else if (key.find("context_length") != std::string::npos)
+      max_tokens = static_cast<int>(value);
+    else if (key.find("block_count") != std::string::npos)
+      ngl = static_cast<int>(value) + 1;
+  }
+  for (const auto& [key, value] : metadata_array_string_) {
+    if (key.compare("tokenizer.ggml.tokens") == 0) {
+      tokens = std::move(value);
+    }
+  }
+  for (const auto& [key, value] : metadata_string_) {
+    if (key.compare("general.name") == 0) {
+      name = std::regex_replace(value, std::regex(" "), "-");
+    } else if (key.compare("tokenizer.chat_template") == 0) {
+      if (value.compare(ZEPHYR_JINJA) == 0) {
+        chat_template =
+            "<|system|>\n{system_message}</s>\n<|user|>\n{prompt}</"
+            "s>\n<|assistant|>\n";
+      } else if (value.compare(OPEN_CHAT_3_5_JINJA) == 0) {
+        chat_template =
+            "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:";
+      } else if (value.compare(LLAMA_3_JINJA) == 0 ||
+                 value.compare(LLAMA_3_1_JINJA) == 0) {
+        chat_template =
+            "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{"
+            "system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>"
+            "\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|"
+            ">\n\n";
+      } else {
+        try {
+          jinja2::Template jinja2_chat_template;
+          jinja2_chat_template.Load(value);
+          jinja2::ValuesMap params{
+              {"add_generation_prompt", true},
+              {"bos_token", tokens[bos_token]},
+              {"eos_token", tokens[eos_token]},
+              {"messages",
+               jinja2::ValuesList{
+                   jinja2::ValuesMap{{"role", "system"},
+                                     {"content", "{system_message}"}},
+                   jinja2::ValuesMap{{"role", "user"},
+                                     {"content", "{prompt}"}}}}};
+          chat_template = jinja2_chat_template.RenderAsString(params).value();
+        } catch (const std::exception& e) {
+          std::cerr << "Error render chat template: " << e.what()
+                    << ". Using default template: \n[INST] "
+                       "<<SYS>>\n{system_message}\n<</SYS>>\n{prompt}[/INST]"
+                    << "\n";
+          chat_template =
+              "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt}[/INST]";
+        }
+      }
+    }
+  }
+
+  eos_string = tokens[eos_token];
+  bos_string = tokens[bos_token];
+  stop.push_back(std::move(eos_string));
+
+  model_config_.stop = std::move(stop);
+
+  model_config_.prompt_template = std::move(chat_template);
+  model_config_.name = name;
+  model_config_.model = name;
+  model_config_.id = name;
+  model_config_.version = std::to_string(version);
+  model_config_.max_tokens = max_tokens;
+  model_config_.ctx_len = max_tokens;
+  model_config_.ngl = ngl;
+}
+
+const ModelConfig& GGUFHandler::GetModelConfig() const {
+  return model_config_;
+}
+}  // namespace config
\ No newline at end of file
diff --git a/engine/config/gguf_parser.h b/engine/config/gguf_parser.h
new file mode 100644
index 000000000..c71a9320f
--- /dev/null
+++ b/engine/config/gguf_parser.h
@@ -0,0 +1,71 @@
+#pragma once
+#include <string>
+#include "yaml_config.h"
+
+namespace config {
+constexpr char OPEN_CHAT_3_5_JINJA[] =
+    "{{ bos_token }}{\% for message in messages \%}{{ 'GPT4 Correct ' + "
+    "message['role'].title() + ': ' + message['content'] + "
+    "'<|end_of_turn|>'}}{\% endfor \%}{\% if add_generation_prompt \%}{{ 'GPT4 "
+    "Correct Assistant:' }}{\% endif \%}";
+constexpr char ZEPHYR_JINJA[] =
+    "{\% for message in messages \%}\n{\% if message['role'] == 'user' \%}\n{{ "
+    "'<|user|>\n' + message['content'] + eos_token }}\n{\% elif "
+    "message['role'] == 'system' \%}\n{{ '<|system|>\n' + message['content'] + "
+    "eos_token }}\n{\% elif message['role'] == 'assistant' \%}\n{{ "
+    "'<|assistant|>\n'  + message['content'] + eos_token }}\n{\% endif "
+    "\%}\n{\% if loop.last and add_generation_prompt \%}\n{{ '<|assistant|>' "
+    "}}\n{\% endif \%}\n{\% endfor \%}";
+constexpr char LLAMA_3_1_JINJA[] =
+    "{\% set loop_messages = messages \%}{\% for message in loop_messages "
+    "\%}{\% set content = '<|start_header_id|>' + message['role'] + "
+    "'<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' \%}{\% "
+    "if loop.index0 == 0 \%}{\% set content = bos_token + content \%}{\% endif "
+    "\%}{{ content }}{\% endfor \%}{{ "
+    "'<|start_header_id|>assistant<|end_header_id|>\n\n' }}";
+constexpr char LLAMA_3_JINJA[] =
+    "{\% set loop_messages = messages \%}{\% for message in loop_messages "
+    "\%}{\% set content = '<|start_header_id|>' + message['role'] + "
+    "'<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' \%}{\% "
+    "if loop.index0 == 0 \%}{\% set content = bos_token + content \%}{\% endif "
+    "\%}{{ content }}{\% endfor \%}{\% if add_generation_prompt \%}{{ "
+    "'<|start_header_id|>assistant<|end_header_id|>\n\n' }}";
+constexpr uint32_t GGUF_MAGIC_NUMBER = 1179993927;
+
+class GGUFHandler {
+ public:
+  void CloseFile();
+  void Parse(const std::string& file_path);
+  const ModelConfig& GetModelConfig() const;
+  void PrintMetadata();
+
+ private:
+  std::pair<std::size_t, std::string> ReadString(std::size_t offset) const;
+  size_t ReadMetadataValue(int type, std::size_t offset,
+                           const std::string& key);
+  size_t ReadArray(std::size_t offset, const std::string& key);
+  void ModelConfigFromMetadata();
+  void OpenFile(const std::string& file_path);
+
+  uint8_t* data_;
+  size_t file_size_;
+  uint32_t version_;
+  uint64_t tensor_count_;
+  ModelConfig model_config_;
+  std::unordered_map<std::string, uint8_t> metadata_uint8_;
+  std::unordered_map<std::string, int8_t> metadata_int8_;
+  std::unordered_map<std::string, uint16_t> metadata_uint16_;
+  std::unordered_map<std::string, int16_t> metadata_int16_;
+  std::unordered_map<std::string, uint32_t> metadata_uint32_;
+  std::unordered_map<std::string, int32_t> metadata_int32_;
+  std::unordered_map<std::string, float> metadata_float_;
+  std::unordered_map<std::string, bool> metadata_bool_;
+  std::unordered_map<std::string, std::string> metadata_string_;
+  std::unordered_map<std::string, uint64_t> metadata_uint64_;
+  std::unordered_map<std::string, int64_t> metadata_int64_;
+  std::unordered_map<std::string, double> metadata_double_;
+  std::unordered_map<std::string, std::vector<float>> metadata_array_float_;
+  std::unordered_map<std::string, std::vector<std::string>>
+      metadata_array_string_;
+};
+}
\ No newline at end of file
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
new file mode 100644
index 000000000..b7cd15810
--- /dev/null
+++ b/engine/config/model_config.h
@@ -0,0 +1,40 @@
+#pragma once
+#include <cmath>
+#include <limits>
+#include <string>
+#include <vector>
+
+namespace config {
+struct ModelConfig {
+  std::string name;
+  std::string model;
+  std::string version;
+  std::vector<std::string> stop = {};
+  float top_p = std::numeric_limits<float>::quiet_NaN();
+  float temperature = std::numeric_limits<float>::quiet_NaN();
+  float frequency_penalty = std::numeric_limits<float>::quiet_NaN();
+  float presence_penalty = std::numeric_limits<float>::quiet_NaN();
+  int max_tokens = std::numeric_limits<int>::quiet_NaN();
+  bool stream = std::numeric_limits<bool>::quiet_NaN();
+  int ngl = std::numeric_limits<int>::quiet_NaN();
+  int ctx_len = std::numeric_limits<int>::quiet_NaN();
+  std::string engine;
+  std::string prompt_template;
+  std::string system_template;
+  std::string user_template;
+  std::string ai_template;
+
+  std::string os;
+  std::string gpu_arch;
+  std::string quantization_method;
+  std::string precision;
+  int tp = std::numeric_limits<int>::quiet_NaN();
+  std::string trtllm_version;
+  bool text_model = std::numeric_limits<bool>::quiet_NaN();
+  std::string id;
+  std::vector<std::string> files;
+  std::size_t created;
+  std::string object;
+  std::string owned_by = "";
+};
+}  // namespace config
\ No newline at end of file
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
new file mode 100644
index 000000000..fe3e57370
--- /dev/null
+++ b/engine/config/yaml_config.cc
@@ -0,0 +1,212 @@
+#include <cmath>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <string>
+using namespace std;
+
+#include "yaml-cpp/yaml.h"
+#include "yaml_config.h"
+
+namespace config {
+// Method to read YAML file
+void YamlHandler::Reset() {
+  model_config_ = ModelConfig();
+  yaml_node_.reset();
+};
+void YamlHandler::ReadYamlFile(const std::string& file_path) {
+  try {
+    yaml_node_ = YAML::LoadFile(file_path);
+    // incase of model.yml file, we don't have files yet, create them
+    if (!yaml_node_["files"]) {
+      auto s = file_path;
+      // normalize path
+      std::replace(s.begin(), s.end(), '\\', '/');
+      std::vector<std::string> v;
+      if (yaml_node_["engine"] &&
+          yaml_node_["engine"].as<std::string>() == "cortex.llamacpp") {
+        v.emplace_back(s.substr(0, s.find_last_of('/')) + "/model.gguf");
+      } else {
+        v.emplace_back(s.substr(0, s.find_last_of('/')));
+      }
+
+      // TODO(any) need to support mutiple gguf files
+      yaml_node_["files"] = v;
+    }
+  } catch (const YAML::BadFile& e) {
+    std::cerr << "Failed to read file: " << e.what() << std::endl;
+    throw;
+  }
+  ModelConfigFromYaml();
+}
+void YamlHandler::SplitPromptTemplate(ModelConfig& mc) {
+  if (mc.prompt_template.size() > 0) {
+    auto& pt = mc.prompt_template;
+    mc.system_template = pt.substr(0, pt.find_first_of('{'));
+    // std::cout << "System template: " << mc.system_template << std::endl;
+    mc.user_template =
+        pt.substr(pt.find_first_of('}') + 1,
+                  pt.find_last_of('{') - pt.find_first_of('}') - 1);
+    // std::cout << "User template : " << mc.user_template << std::endl;
+    mc.ai_template = pt.substr(pt.find_last_of('}') + 1);
+    // std::cout << "Assistant template: " << mc.ai_template << std::endl;
+  }
+}
+const ModelConfig& YamlHandler::GetModelConfig() const {
+  return model_config_;
+}
+
+void YamlHandler::ModelConfigFromFile(const std::string& file_path) {
+  ReadYamlFile(file_path);
+  ModelConfigFromYaml();
+}
+
+void YamlHandler::ModelConfigFromYaml() {
+  ModelConfig tmp;
+  try {
+    if (yaml_node_["name"])
+      tmp.name = yaml_node_["name"].as<std::string>();
+    if (yaml_node_["model"])
+      tmp.model = yaml_node_["model"].as<std::string>();
+    if (yaml_node_["version"])
+      tmp.version = yaml_node_["version"].as<std::string>();
+    if (yaml_node_["engine"])
+      tmp.engine = yaml_node_["engine"].as<std::string>();
+    if (yaml_node_["prompt_template"]) {
+      tmp.prompt_template = yaml_node_["prompt_template"].as<std::string>();
+      SplitPromptTemplate(tmp);
+    }
+
+    if (yaml_node_["os"])
+      tmp.os = yaml_node_["os"].as<std::string>();
+    if (yaml_node_["gpu_arch"])
+      tmp.gpu_arch = yaml_node_["gpu_arch"].as<std::string>();
+    if (yaml_node_["quantization_method"])
+      tmp.quantization_method =
+          yaml_node_["quantization_method"].as<std::string>();
+    if (yaml_node_["precision"])
+      tmp.precision = yaml_node_["precision"].as<std::string>();
+    if (yaml_node_["trtllm_version"])
+      tmp.trtllm_version = yaml_node_["trtllm_version"].as<std::string>();
+    if (yaml_node_["id"])
+      tmp.id = yaml_node_["id"].as<std::string>();
+    if (yaml_node_["object"])
+      tmp.object = yaml_node_["object"].as<std::string>();
+    if (yaml_node_["owned_by"])
+      tmp.owned_by = yaml_node_["owned_by"].as<std::string>();
+    if (yaml_node_["top_p"])
+      tmp.top_p = yaml_node_["top_p"].as<float>();
+    if (yaml_node_["temperature"])
+      tmp.temperature = yaml_node_["temperature"].as<float>();
+    if (yaml_node_["frequency_penalty"])
+      tmp.frequency_penalty = yaml_node_["frequency_penalty"].as<float>();
+    if (yaml_node_["presence_penalty"])
+      tmp.presence_penalty = yaml_node_["presence_penalty"].as<float>();
+    if (yaml_node_["max_tokens"])
+      tmp.max_tokens = yaml_node_["max_tokens"].as<int>();
+    if (yaml_node_["ngl"])
+      tmp.ngl = yaml_node_["ngl"].as<int>();
+    if (yaml_node_["ctx_len"])
+      tmp.ctx_len = yaml_node_["ctx_len"].as<int>();
+    if (yaml_node_["tp"])
+      tmp.tp = yaml_node_["tp"].as<int>();
+    if (yaml_node_["stream"])
+      tmp.stream = yaml_node_["stream"].as<bool>();
+    if (yaml_node_["text_model"])
+      tmp.tp = yaml_node_["text_model"].as<bool>();
+    if (yaml_node_["stop"])
+      tmp.stop = yaml_node_["stop"].as<std::vector<std::string>>();
+    if (yaml_node_["files"])
+      tmp.files = yaml_node_["files"].as<std::vector<std::string>>();
+    if (yaml_node_["created"])
+      tmp.created = yaml_node_["created"].as<std::size_t>();
+  } catch (const std::exception& e) {
+    std::cerr << "Error when load model config : " << e.what() << std::endl;
+    std::cerr << "Revert ..." << std::endl;
+    return;
+  }
+  model_config_ = std::move(tmp);
+}
+
+void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
+  ModelConfig tmp = std::move(model_config_);
+  try {
+    model_config_ = std::move(new_model_config);
+    yaml_node_.reset();
+    if (!model_config_.name.empty())
+      yaml_node_["name"] = model_config_.name;
+    if (!model_config_.model.empty())
+      yaml_node_["model"] = model_config_.model;
+    if (!model_config_.version.empty())
+      yaml_node_["version"] = model_config_.version;
+    if (!model_config_.engine.empty())
+      yaml_node_["engine"] = model_config_.engine;
+    if (!model_config_.prompt_template.empty()) {
+      yaml_node_["prompt_template"] = model_config_.prompt_template;
+      SplitPromptTemplate(model_config_);
+    }
+
+    if (!model_config_.os.empty())
+      yaml_node_["os"] = model_config_.os;
+    if (!model_config_.gpu_arch.empty())
+      yaml_node_["gpu_arch"] = model_config_.gpu_arch;
+    if (!model_config_.quantization_method.empty())
+      yaml_node_["quantization_method"] = model_config_.quantization_method;
+    if (!model_config_.precision.empty())
+      yaml_node_["precision"] = model_config_.precision;
+    if (!model_config_.trtllm_version.empty())
+      yaml_node_["trtllm_version"] = model_config_.trtllm_version;
+    if (!model_config_.id.empty())
+      yaml_node_["id"] = model_config_.id;
+    if (!model_config_.object.empty())
+      yaml_node_["object"] = model_config_.object;
+    if (!model_config_.owned_by.empty())
+      yaml_node_["owned_by"] = model_config_.owned_by;
+    if (!std::isnan(model_config_.top_p))
+      yaml_node_["top_p"] = model_config_.top_p;
+    if (!std::isnan(model_config_.temperature))
+      yaml_node_["temperature"] = model_config_.temperature;
+    if (!std::isnan(model_config_.frequency_penalty))
+      yaml_node_["frequency_penalty"] = model_config_.frequency_penalty;
+    if (!std::isnan(model_config_.presence_penalty))
+      yaml_node_["presence_penalty"] = model_config_.presence_penalty;
+    if (!std::isnan(static_cast<double>(model_config_.max_tokens)))
+      yaml_node_["max_tokens"] = model_config_.max_tokens;
+    if (!std::isnan(static_cast<double>(model_config_.ngl)))
+      yaml_node_["ngl"] = model_config_.ngl;
+    if (!std::isnan(static_cast<double>(model_config_.ctx_len)))
+      yaml_node_["ctx_len"] = model_config_.ctx_len;
+    if (!std::isnan(static_cast<double>(model_config_.tp)))
+      yaml_node_["tp"] = model_config_.tp;
+    if (!std::isnan(static_cast<double>(model_config_.stream)))
+      yaml_node_["stream"] = model_config_.stream;
+    if (!std::isnan(static_cast<double>(model_config_.text_model)))
+      yaml_node_["text_model"] = model_config_.text_model;
+    if (model_config_.stop.size() > 0)
+      yaml_node_["stop"] = model_config_.stop;
+    if (model_config_.files.size() > 0)
+      yaml_node_["files"] = model_config_.files;
+    yaml_node_["created"] = std::time(nullptr);
+  } catch (const std::exception& e) {
+    std::cerr << "Error when update model config : " << e.what() << std::endl;
+    std::cerr << "Revert ..." << std::endl;
+    model_config_ = std::move(tmp);
+  }
+}
+
+// Method to write all attributes to a YAML file
+void YamlHandler::WriteYamlFile(const std::string& file_path) const {
+  try {
+    std::ofstream outFile(file_path);
+    if (!outFile) {
+      throw std::runtime_error("Failed to open output file.");
+    }
+    outFile << yaml_node_;
+    outFile.close();
+  } catch (const std::exception& e) {
+    std::cerr << "Error writing to file: " << e.what() << std::endl;
+    throw;
+  }
+}
+}  // namespace config
\ No newline at end of file
diff --git a/engine/config/yaml_config.h b/engine/config/yaml_config.h
new file mode 100644
index 000000000..3f8af5400
--- /dev/null
+++ b/engine/config/yaml_config.h
@@ -0,0 +1,32 @@
+#pragma once
+#include <cmath>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <string>
+
+#include "yaml-cpp/yaml.h"
+#include "model_config.h"
+namespace config {
+class YamlHandler {
+ private:
+  YAML::Node yaml_node_;
+  ModelConfig model_config_;
+  void ReadYamlFile(const std::string& file_path);
+  void ModelConfigFromYaml();
+  void SplitPromptTemplate(ModelConfig& mc);
+
+ public:
+  // Method to read YAML file
+  void Reset();
+
+  const ModelConfig& GetModelConfig() const;
+
+  void ModelConfigFromFile(const std::string& file_path);
+
+  void UpdateModelConfig(ModelConfig new_model_config);
+  // Method to write all attributes to a YAML file
+  void WriteYamlFile(const std::string& file_path) const;
+};
+}
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
new file mode 100644
index 000000000..38c071546
--- /dev/null
+++ b/engine/controllers/command_line_parser.cc
@@ -0,0 +1,93 @@
+#include "command_line_parser.h"
+#include "commands/model_pull_cmd.h"
+#include "commands/start_model_cmd.h"
+#include "commands/stop_model_cmd.h"
+#include "commands/stop_server_cmd.h"
+#include "config/yaml_config.h"
+#include "utils/cortex_utils.h"
+
+CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {}
+
+bool CommandLineParser::SetupCommand(int argc, char** argv) {
+  // Models group commands
+  {
+    auto models_cmd =
+        app_.add_subcommand("models", "Subcommands for managing models");
+
+    auto start_cmd = models_cmd->add_subcommand("start", "Start a model by ID");
+    std::string model_id;
+    start_cmd->add_option("model_id", model_id, "");
+    start_cmd->callback([&model_id]() {
+      // TODO(sang) switch to <model_id>.yaml when implement model manager
+      config::YamlHandler yaml_handler;
+      yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() +
+                                       "/models/" + model_id + "/model.yml");
+      commands::StartModelCmd smc("127.0.0.1", 3928,
+                                  yaml_handler.GetModelConfig());
+      smc.Exec();
+    });
+
+    auto stop_model_cmd =
+        models_cmd->add_subcommand("stop", "Stop a model by ID");
+    stop_model_cmd->add_option("model_id", model_id, "");
+    stop_model_cmd->callback([&model_id]() {
+      // TODO(sang) switch to <model_id>.yaml when implement model manager
+      config::YamlHandler yaml_handler;
+      yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() +
+                                       "/models/" + model_id + "/model.yml");
+      commands::StopModelCmd smc("127.0.0.1", 3928,
+                                 yaml_handler.GetModelConfig());
+      smc.Exec();
+    });
+
+    auto list_models_cmd =
+        models_cmd->add_subcommand("list", "List all models locally");
+
+    //// Models group commands
+    auto model_pull_cmd =
+        app_.add_subcommand("pull",
+                            "Download a model from a registry. Working with "
+                            "HuggingFace repositories. For available models, "
+                            "please visit https://huggingface.co/cortexso");
+    model_pull_cmd->add_option("model_id", model_id, "");
+    model_pull_cmd->callback([&model_id]() {
+      commands::ModelPullCmd command(model_id);
+      command.Exec();
+    });
+
+    auto remove_cmd =
+        models_cmd->add_subcommand("remove", "Remove a model by ID locally");
+    auto update_cmd =
+        models_cmd->add_subcommand("update", "Update configuration of a model");
+  }
+  //// End of Models group commands
+
+  auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model");
+
+  auto ps_cmd =
+      app_.add_subcommand("ps", "Show running models and their status");
+
+  auto embeddings_cmd = app_.add_subcommand(
+      "embeddings", "Creates an embedding vector representing the input text");
+
+  // engines group commands
+  auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines");
+  auto list_engines_cmd =
+      engines_cmd->add_subcommand("list", "List all cortex engines");
+  auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
+  auto init_cmd = engines_cmd->add_subcommand("init", "Setup engine");
+
+  auto run_cmd =
+      app_.add_subcommand("run", "Shortcut to start a model and chat");
+
+  auto stop_cmd = app_.add_subcommand("stop", "Stop the API server");
+
+  stop_cmd->callback([] {
+    // TODO get info from config file
+    commands::StopServerCmd ssc("127.0.0.1", 3928);
+    ssc.Exec();
+  });
+
+  CLI11_PARSE(app_, argc, argv);
+  return true;
+}
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h
new file mode 100644
index 000000000..3324d45e0
--- /dev/null
+++ b/engine/controllers/command_line_parser.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <vector>
+#include "CLI/CLI.hpp"
+
+class CommandLineParser {
+ public:
+  CommandLineParser();
+  bool SetupCommand(int argc, char** argv);
+
+ private:
+  CLI::App app_;
+};
\ No newline at end of file
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
new file mode 100644
index 000000000..c5c970412
--- /dev/null
+++ b/engine/services/download_service.cc
@@ -0,0 +1,111 @@
+#include <trantor/utils/Logger.h>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <thread>
+
+#include "download_service.h"
+
+void DownloadService::AddDownloadTask(const DownloadTask& task) {
+  tasks.push_back(task);
+
+  for (const auto& item : task.items) {
+    StartDownloadItem(task.id, item);
+  }
+}
+
+void DownloadService::AddAsyncDownloadTask(const DownloadTask& task) {
+  tasks.push_back(task);
+  for (const auto& item : task.items) {
+    // TODO: maybe apply std::async is better?
+    std::thread([this, task, item]() {
+      this->StartDownloadItem(task.id, item);
+    }).detach();
+  }
+}
+
+const std::string DownloadService::GetContainerFolderPath(DownloadType type) {
+  std::filesystem::path container_folder_path;
+
+  switch (type) {
+    case DownloadType::Model: {
+      container_folder_path = std::filesystem::current_path() / "models";
+      break;
+    }
+    case DownloadType::Engine: {
+      container_folder_path = std::filesystem::current_path() / "engines";
+      break;
+    }
+    default: {
+      container_folder_path = std::filesystem::current_path() / "misc";
+      break;
+    }
+  }
+
+  if (!std::filesystem::exists(container_folder_path)) {
+    LOG_INFO << "Creating folder: " << container_folder_path.string() << "\n";
+    std::filesystem::create_directory(container_folder_path);
+  }
+
+  return container_folder_path.string();
+}
+
+void DownloadService::StartDownloadItem(const std::string& downloadId,
+                                        const DownloadItem& item,
+                                        const DownloadItemCb& callback) {
+  LOG_INFO << "Downloading item: " << downloadId;
+  const std::string containerFolderPath = GetContainerFolderPath(item.type);
+  LOG_INFO << "Container folder path: " << containerFolderPath << "\n";
+  const std::filesystem::path itemFolderPath =
+      std::filesystem::path(containerFolderPath) /
+      std::filesystem::path(downloadId);
+  if (!std::filesystem::exists(itemFolderPath)) {
+    LOG_INFO << "Creating " << itemFolderPath.string();
+    std::filesystem::create_directory(itemFolderPath);
+  }
+
+  LOG_INFO << "itemFolderPath: " << itemFolderPath.string();
+  auto outputFilePath = itemFolderPath / std::filesystem::path(item.fileName);
+  LOG_INFO << "Absolute file output: " << outputFilePath.string();
+
+  uint64_t last = 0;
+  uint64_t tot = 0;
+  std::ofstream outputFile(outputFilePath, std::ios::binary);
+
+  std::ostringstream downloadUrl;
+  downloadUrl << item.host << "/" << item.path;
+  LOG_INFO << "Downloading url: " << downloadUrl.str();
+
+  httplib::Client client(item.host);
+
+  client.set_follow_location(true);
+  client.Get(
+      downloadUrl.str(),
+      [](const httplib::Response& res) {
+        if (res.status != httplib::StatusCode::OK_200) {
+          LOG_ERROR << "HTTP error: " << res.reason;
+          return false;
+        }
+        return true;
+      },
+      [&](const char* data, size_t data_length) {
+        tot += data_length;
+        outputFile.write(data, data_length);
+        return true;
+      },
+      [&last, this](uint64_t current, uint64_t total) {
+        if (current - last > kUpdateProgressThreshold) {
+          last = current;
+          LOG_INFO << "Downloading: " << current << " / " << total;
+        }
+        if (current == total) {
+          LOG_INFO << "Done download: "
+                   << static_cast<double>(total) / 1024 / 1024 << " MiB";
+          return false;
+        }
+        return true;
+      });
+  if(callback){
+    callback(outputFilePath.string());
+  }
+}
\ No newline at end of file
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
new file mode 100644
index 000000000..e7fc14b6d
--- /dev/null
+++ b/engine/services/download_service.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <optional>
+#include <vector>
+#include "httplib.h"
+
+enum class DownloadType { Model, Engine, Miscellaneous };
+
+enum class DownloadStatus {
+  Pending,
+  Downloading,
+  Error,
+  Downloaded,
+};
+
+struct DownloadItem {
+  std::string id;
+
+  std::string host;
+
+  std::string fileName;
+
+  DownloadType type;
+
+  std::string path;
+
+  uint64_t totalSize;
+
+  uint64_t transferredSize;
+
+  DownloadStatus status;
+
+  std::optional<std::string> checksum;
+};
+
+struct DownloadTask {
+  std::string id;
+  DownloadType type;
+  float percentage;
+  DownloadStatus status;
+  std::optional<std::string> error;
+  std::vector<DownloadItem> items;
+};
+
+class DownloadService {
+ public:
+  /**
+  * @brief Synchronously download.
+  * 
+  * @param task 
+  */
+  using DownloadItemCb = std::function<void(const std::string&)>;
+  void AddDownloadTask(const DownloadTask& task);
+
+  void AddAsyncDownloadTask(const DownloadTask& task);
+
+  // TODO: [NamH] implement the following methods
+  //  void removeTask(const std::string &id);
+  //  void registerCallback
+  //  setup folder path at runtime
+  //  register action after downloaded
+
+ private:
+  void StartDownloadItem(const std::string& downloadId,
+                         const DownloadItem& item,
+                         const DownloadItemCb& callback = nullptr);
+
+  const std::string GetContainerFolderPath(DownloadType type);
+
+  // store tasks so we can abort it later
+  std::vector<DownloadTask> tasks;
+  const int kUpdateProgressThreshold = 100000000;
+};
\ No newline at end of file
diff --git a/engine/utils/archive_utils.h b/engine/utils/archive_utils.h
new file mode 100644
index 000000000..6b2f5767d
--- /dev/null
+++ b/engine/utils/archive_utils.h
@@ -0,0 +1,146 @@
+#include <archive.h>
+#include <archive_entry.h>
+#include <minizip/unzip.h>
+#include <trantor/utils/Logger.h>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+
+namespace archive_utils {
+inline bool UnzipFile(const std::string& input_zip_path,
+                      const std::string& destination_path);
+inline bool UntarFile(const std::string& input_tar_path,
+                      const std::string& destination_path);
+
+inline bool ExtractArchive(const std::string& input_path,
+                           const std::string& destination_path) {
+  if (input_path.find(".zip") != std::string::npos) {
+    return UnzipFile(input_path, destination_path);
+  } else if (input_path.find(".tar") != std::string::npos ||
+             input_path.find(".tar.gz") != std::string::npos) {
+    return UntarFile(input_path, destination_path);
+  } else {
+    LOG_ERROR << "Unsupported file type: " << input_path << "\n";
+    return false;
+  }
+}
+
+inline bool UnzipFile(const std::string& input_zip_path,
+                      const std::string& destination_path) {
+  unzFile zip_file = unzOpen(input_zip_path.c_str());
+  if (!zip_file) {
+    LOG_ERROR << "Failed to open zip file: " << input_zip_path << "\n";
+    return false;
+  }
+
+  std::filesystem::create_directories(destination_path);
+
+  if (unzGoToFirstFile(zip_file) != UNZ_OK) {
+    LOG_ERROR << "Error opening first file in zip" << "\n";
+    unzClose(zip_file);
+    return false;
+  }
+
+  do {
+    unz_file_info file_info;
+    char file_name[256];
+    if (unzGetCurrentFileInfo(zip_file, &file_info, file_name,
+                              sizeof(file_name), nullptr, 0, nullptr,
+                              0) != UNZ_OK) {
+      LOG_ERROR << "Failed to get file info" << "\n";
+      unzClose(zip_file);
+      return false;
+    }
+
+    std::string full_path = destination_path + "/" + file_name;
+
+    if (file_name[strlen(file_name) - 1] == '/') {
+      std::filesystem::create_directories(full_path);
+    } else {
+      std::filesystem::create_directories(
+          std::filesystem::path(full_path).parent_path());
+
+      if (unzOpenCurrentFile(zip_file) != UNZ_OK) {
+        LOG_ERROR << "Failed to open file in zip: " << file_name << "\n";
+        unzClose(zip_file);
+        return false;
+      }
+
+      std::ofstream outFile(full_path, std::ios::binary);
+      if (!outFile.is_open()) {
+        LOG_ERROR << "Failed to create file: " << full_path << "\n";
+        unzCloseCurrentFile(zip_file);
+        unzClose(zip_file);
+        return false;
+      }
+
+      char buffer[8192];
+      int bytes_read;
+      while ((bytes_read =
+                  unzReadCurrentFile(zip_file, buffer, sizeof(buffer))) > 0) {
+        outFile.write(buffer, bytes_read);
+      }
+
+      outFile.close();
+      unzCloseCurrentFile(zip_file);
+    }
+  } while (unzGoToNextFile(zip_file) == UNZ_OK);
+
+  unzClose(zip_file);
+  LOG_INFO << "Extracted successfully " << input_zip_path << " to "
+           << destination_path << "\n";
+  return true;
+}
+
+inline bool UntarFile(const std::string& input_tar_path,
+                      const std::string& destination_path) {
+  struct archive* tar_archive = archive_read_new();
+  archive_read_support_format_tar(tar_archive);
+  archive_read_support_compression_gzip(tar_archive);
+
+  if (archive_read_open_filename(tar_archive, input_tar_path.c_str(), 10240) !=
+      ARCHIVE_OK) {
+    LOG_ERROR << "Failed to open tar file: " << input_tar_path << "\n";
+    archive_read_free(tar_archive);
+    return false;
+  }
+
+  std::filesystem::create_directories(destination_path);
+  struct archive_entry* entry;
+  while (archive_read_next_header(tar_archive, &entry) == ARCHIVE_OK) {
+    const char* current_file = archive_entry_pathname(entry);
+    std::string full_path = destination_path + "/" + current_file;
+
+    if (archive_entry_filetype(entry) == AE_IFDIR) {
+      std::filesystem::create_directories(full_path);
+    } else {
+      std::filesystem::create_directories(
+          std::filesystem::path(full_path).parent_path());
+
+      std::ofstream out_file(full_path, std::ios::binary);
+      if (!out_file.is_open()) {
+        LOG_ERROR << "Failed to create file: " << full_path << "\n";
+        archive_read_free(tar_archive);
+        return false;
+      }
+
+      const void* buff;
+      size_t size;
+      la_int64_t offset;
+      while (archive_read_data_block(tar_archive, &buff, &size, &offset) ==
+             ARCHIVE_OK) {
+        out_file.write(static_cast<const char*>(buff), size);
+      }
+
+      out_file.close();
+    }
+
+    archive_entry_clear(entry);
+  }
+
+  archive_read_free(tar_archive);
+  LOG_INFO << "Extracted successfully " << input_tar_path << " to "
+           << destination_path << "\n";
+  return true;
+}
+}  // namespace archive_utils
\ No newline at end of file
diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h
new file mode 100644
index 000000000..6150e9f0a
--- /dev/null
+++ b/engine/utils/cortexso_parser.h
@@ -0,0 +1,69 @@
+#include <trantor/utils/Logger.h>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <services/download_service.h>
+#include <nlohmann/json.hpp>
+#include "httplib.h"
+
+namespace cortexso_parser {
+constexpr static auto kHuggingFaceHost = "https://huggingface.co";
+
+inline std::optional<DownloadTask> getDownloadTask(
+    const std::string& modelId, const std::string& branch = "main") {
+  using namespace nlohmann;
+  std::ostringstream oss;
+  oss << "/api/models/cortexso/" << modelId << "/tree/" << branch;
+  const std::string url = oss.str();
+
+  std::ostringstream repoAndModelId;
+  repoAndModelId << "cortexso/" << modelId;
+  const std::string repoAndModelIdStr = repoAndModelId.str();
+
+  httplib::Client cli(kHuggingFaceHost);
+  if (auto res = cli.Get(url)) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      try {
+        auto jsonResponse = json::parse(res->body);
+
+        std::vector<DownloadItem> downloadItems{};
+        for (auto& [key, value] : jsonResponse.items()) {
+          std::ostringstream downloadUrlOutput;
+          auto path = value["path"].get<std::string>();
+          downloadUrlOutput << repoAndModelIdStr << "/resolve/" << branch << "/"
+                            << path;
+          const std::string downloadUrl = downloadUrlOutput.str();
+
+          DownloadItem downloadItem{};
+          downloadItem.id = path;
+          downloadItem.host = kHuggingFaceHost;
+          downloadItem.fileName = path;
+          downloadItem.type = DownloadType::Model;
+          downloadItem.path = downloadUrl;
+          downloadItem.totalSize = value["size"].get<int>();
+          downloadItem.transferredSize = 0;
+          downloadItem.status = DownloadStatus::Pending;
+          downloadItems.push_back(downloadItem);
+        }
+
+        DownloadTask downloadTask{};
+        downloadTask.id = modelId;
+        downloadTask.type = DownloadType::Model;
+        downloadTask.percentage = 0.0f;
+        downloadTask.status = DownloadStatus::Pending;
+        downloadTask.error = std::nullopt;
+        downloadTask.items = downloadItems;
+
+        return downloadTask;
+      } catch (const nlohmann::json::parse_error& e) {
+        std::cerr << "JSON parse error: " << e.what() << std::endl;
+      }
+    }
+  } else {
+    auto err = res.error();
+    LOG_ERROR << "HTTP error: " << httplib::to_string(err);
+  }
+  return std::nullopt;
+}
+}  // namespace cortexso_parser
\ No newline at end of file
diff --git a/engine/utils/http_util.h b/engine/utils/http_util.h
new file mode 100644
index 000000000..73c53668f
--- /dev/null
+++ b/engine/utils/http_util.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <drogon/HttpController.h>
+
+using namespace drogon;
+
+namespace http_util {
+
+bool HasFieldInReq(const HttpRequestPtr& req,
+                   std::function<void(const HttpResponsePtr&)>& callback,
+                   const std::string& field) {
+  if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) {
+    Json::Value res;
+    res["message"] = "No " + field + " field in request body";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k409Conflict);
+    callback(resp);
+    LOG_WARN << "No " << field << " field in request body";
+    return false;
+  }
+  return true;
+}
+
+}  // namespace http_util
\ No newline at end of file

From 35e45f247cdb6879ec01c7103f4b2a5ef1ee706f Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 26 Aug 2024 11:40:00 +0700
Subject: [PATCH 03/16] fix: CI with vcpkg

---
 .github/workflows/cortex-cpp-quality-gate.yml | 22 +++++++++++--------
 engine/Makefile                               | 16 ++++++++++----
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index b30fb30de..b9515858f 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -5,7 +5,7 @@ on:
     types: [opened, synchronize, reopened]
     paths:
       [
-        "cortex-cpp/**",
+        "engine/**",
       ]
   workflow_dispatch:
 
@@ -24,25 +24,25 @@ jobs:
           - os: "linux"
             name: "amd64"
             runs-on: "ubuntu-20-04"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}}"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
           - os: "mac"
             name: "amd64"
             runs-on: "macos-13"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}}"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
           - os: "mac"
             name: "arm64"
             runs-on: "mac-silicon"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
           - os: "windows"
             name: "amd64"
             runs-on: "windows-cuda-12-0"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
             build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
             ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'          
     steps:
@@ -69,26 +69,30 @@ jobs:
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
+      - name: Configure vcpkg
+        run: |
+          make configure-vcpkg
+      
       - name: Build
         run: |
-          cd cortex-cpp
+          cd engine
           make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
  
       - name: Pre-package
         run: |
-          cd cortex-cpp
+          cd engine
           make pre-package
           
       - name: Package
         run: |
-          cd cortex-cpp
+          cd engine
           make package
 
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:
           name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
-          path: ./cortex-cpp/cortex-cpp
+          path: ./engine/cortex-cpp
 
       - name: Upload ccache to s3
         continue-on-error: true
diff --git a/engine/Makefile b/engine/Makefile
index 83c3f61f1..a3fdaa0fd 100644
--- a/engine/Makefile
+++ b/engine/Makefile
@@ -18,19 +18,27 @@ DEVELOPER_ID ?= xxxx
 all:
 	@echo "Specify a target to run"
 
+configure-vcpkg:
+ifeq ($(OS),Windows_NT)
+	@cd engine/vcpkg && bootstrap-vcpkg.bat;
+	@cd engine/vcpkg && vcpkg install
+else ifeq ($(shell uname -s),Linux) 
+	@cd engine/vcpkg && ./bootstrap-vcpkg.sh;
+	@cd engine/vcpkg && ./vcpkg install;
+else
+	@cd engine/vcpkg && ./bootstrap-vcpkg.sh;
+	@cd engine/vcpkg && ./vcpkg install;
+endif
+
 # Build the Cortex engine
 build:
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps $(BUILD_DEPS_CMAKE_EXTRA_FLAGS);"
-	@powershell -Command "cmake --build ./build-deps/cortex-cpp-deps --config Release -j4;"
 	@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;"
 else ifeq ($(shell uname -s),Linux)
-	@./install_deps.sh;
 	@mkdir -p build && cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
 	make -j4;
 else
-	@./install_deps.sh;
 	@mkdir -p build && cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
 	make -j4;

From b207d23e3c27ea0a7a2a9b14fc03a98065b1da24 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 26 Aug 2024 12:07:50 +0700
Subject: [PATCH 04/16] fix: Makefile

---
 .github/workflows/cortex-cpp-quality-gate.yml |  1 +
 engine/Makefile                               | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index b9515858f..97db3ea2d 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -71,6 +71,7 @@ jobs:
 
       - name: Configure vcpkg
         run: |
+          cd engine
           make configure-vcpkg
       
       - name: Build
diff --git a/engine/Makefile b/engine/Makefile
index a3fdaa0fd..ae4a49258 100644
--- a/engine/Makefile
+++ b/engine/Makefile
@@ -20,14 +20,14 @@ all:
 
 configure-vcpkg:
 ifeq ($(OS),Windows_NT)
-	@cd engine/vcpkg && bootstrap-vcpkg.bat;
-	@cd engine/vcpkg && vcpkg install
+	@cd vcpkg && bootstrap-vcpkg.bat;
+	@cd vcpkg && vcpkg install
 else ifeq ($(shell uname -s),Linux) 
-	@cd engine/vcpkg && ./bootstrap-vcpkg.sh;
-	@cd engine/vcpkg && ./vcpkg install;
+	@cd vcpkg && ./bootstrap-vcpkg.sh;
+	@cd vcpkg && ./vcpkg install;
 else
-	@cd engine/vcpkg && ./bootstrap-vcpkg.sh;
-	@cd engine/vcpkg && ./vcpkg install;
+	@cd vcpkg && ./bootstrap-vcpkg.sh;
+	@cd vcpkg && ./vcpkg install;
 endif
 
 # Build the Cortex engine

From 8a04157b845e54bec45c1f88e11c08e4ebc6e9d1 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 26 Aug 2024 14:23:12 +0700
Subject: [PATCH 05/16] fix: quality gate

---
 .github/workflows/cortex-cpp-quality-gate.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 97db3ea2d..85b7f1550 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -23,7 +23,7 @@ jobs:
         include:
           - os: "linux"
             name: "amd64"
-            runs-on: "ubuntu-20-04"
+            runs-on: "ubuntu-20-04-cuda-12-0"
             cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
@@ -35,7 +35,7 @@ jobs:
             ccache-dir: ''
           - os: "mac"
             name: "arm64"
-            runs-on: "mac-silicon"
+            runs-on: "macos-latest"
             cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''

From c604d3852f71a09311c6b7ef80f04971d54fa78f Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 26 Aug 2024 14:41:39 +0700
Subject: [PATCH 06/16] fix: CI

---
 .github/workflows/cortex-cpp-quality-gate.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 85b7f1550..092423821 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -24,25 +24,25 @@ jobs:
           - os: "linux"
             name: "amd64"
             runs-on: "ubuntu-20-04-cuda-12-0"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
           - os: "mac"
             name: "amd64"
             runs-on: "macos-13"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
           - os: "mac"
             name: "arm64"
             runs-on: "macos-latest"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=/Users/runner/work/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
             build-deps-cmake-flags: ""
             ccache-dir: ''
           - os: "windows"
             name: "amd64"
             runs-on: "windows-cuda-12-0"
-            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex.cpp/cortex.cpp/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex/cortex/engine/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
             build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
             ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'          
     steps:

From 5e2fe08e3e3437e90f377113047274d2fa0c2a97 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 27 Aug 2024 10:44:53 +0700
Subject: [PATCH 07/16] feat: model list + post download process + download
 engine

---
 engine/commands/engine_init_cmd.cc        | 109 ++++++++++++++++++++++
 engine/commands/engine_init_cmd.h         |  21 +++++
 engine/commands/model_list_cmd.cc         |  30 ++++++
 engine/commands/model_list_cmd.h          |  11 +++
 engine/commands/model_pull_cmd.cc         |   4 +-
 engine/controllers/command_line_parser.cc |  34 +++++--
 engine/controllers/models.cc              | 104 +++++++++++++++++++++
 engine/controllers/models.h               |  23 +++++
 engine/controllers/server.cc              |   2 +-
 engine/controllers/server.h               |   4 +-
 engine/services/download_service.cc       |  77 ++++++---------
 engine/services/download_service.h        |  21 ++---
 engine/utils/cortex_utils.h               |  14 ++-
 engine/utils/cortexso_parser.h            |   7 +-
 engine/utils/file_manager_utils.h         |  43 +++++++++
 engine/utils/http_util.h                  |   6 +-
 engine/utils/model_callback_utils.h       |  51 ++++++++++
 engine/utils/system_info_utils.h          |  36 +++++++
 18 files changed, 512 insertions(+), 85 deletions(-)
 create mode 100644 engine/commands/engine_init_cmd.cc
 create mode 100644 engine/commands/engine_init_cmd.h
 create mode 100644 engine/commands/model_list_cmd.cc
 create mode 100644 engine/commands/model_list_cmd.h
 create mode 100644 engine/controllers/models.cc
 create mode 100644 engine/controllers/models.h
 create mode 100644 engine/utils/file_manager_utils.h
 create mode 100644 engine/utils/model_callback_utils.h
 create mode 100644 engine/utils/system_info_utils.h

diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc
new file mode 100644
index 000000000..430433e4d
--- /dev/null
+++ b/engine/commands/engine_init_cmd.cc
@@ -0,0 +1,109 @@
+#include "engine_init_cmd.h"
+#include <utility>
+#include "services/download_service.h"
+#include "trantor/utils/Logger.h"
+// clang-format off
+#include "utils/cortexso_parser.h" 
+#include "utils/archive_utils.h"   
+#include "utils/system_info_utils.h"
+// clang-format on
+
+namespace commands {
+
+EngineInitCmd::EngineInitCmd(std::string engineName, std::string version)
+    : engineName_(std::move(engineName)), version_(std::move(version)) {}
+
+void EngineInitCmd::Exec() const {
+  if (engineName_.empty()) {
+    LOG_ERROR << "Engine name is required";
+    return;
+  }
+
+  // Check if the architecture and OS are supported
+  auto system_info = system_info_utils::GetSystemInfo();
+  if (system_info.arch == system_info_utils::kUnsupported ||
+      system_info.os == system_info_utils::kUnsupported) {
+    LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", "
+              << system_info.arch;
+    return;
+  }
+
+  // check if engine is supported
+  if (std::find(supportedEngines_.begin(), supportedEngines_.end(),
+                engineName_) == supportedEngines_.end()) {
+    LOG_ERROR << "Engine not supported";
+    return;
+  }
+
+  constexpr auto gitHubHost = "https://api.github.com";
+
+  std::ostringstream engineReleasePath;
+  engineReleasePath << "/repos/janhq/" << engineName_ << "/releases/"
+                    << version_;
+
+  using namespace nlohmann;
+
+  httplib::Client cli(gitHubHost);
+  if (auto res = cli.Get(engineReleasePath.str())) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      try {
+        auto jsonResponse = json::parse(res->body);
+        auto assets = jsonResponse["assets"];
+        auto os_arch{system_info.os + "-" + system_info.arch};
+
+        for (auto& asset : assets) {
+          auto assetName = asset["name"].get<std::string>();
+          if (assetName.find(os_arch) != std::string::npos) {
+            std::string host{"https://github.com"};
+
+            auto full_url = asset["browser_download_url"].get<std::string>();
+            std::string path = full_url.substr(host.length());
+
+            auto fileName = asset["name"].get<std::string>();
+            LOG_INFO << "URL: " << full_url;
+
+            auto downloadTask = DownloadTask{.id = engineName_,
+                                             .type = DownloadType::Engine,
+                                             .error = std::nullopt,
+                                             .items = {DownloadItem{
+                                                 .id = engineName_,
+                                                 .host = host,
+                                                 .fileName = fileName,
+                                                 .type = DownloadType::Engine,
+                                                 .path = path,
+                                             }}};
+
+            DownloadService().AddDownloadTask(
+                downloadTask,
+                [&downloadTask](const std::string& absolute_path) {
+                  // try to unzip the downloaded file
+                  std::filesystem::path downloadedEnginePath{absolute_path};
+                  LOG_INFO << "Downloaded engine path: "
+                           << downloadedEnginePath.string();
+
+                  archive_utils::ExtractArchive(
+                      downloadedEnginePath.string(),
+                      downloadedEnginePath.parent_path()
+                          .parent_path()
+                          .string());
+
+                  // remove the downloaded file
+                  std::filesystem::remove(absolute_path);
+                  LOG_INFO << "Finished!";
+                });
+
+            return;
+          }
+        }
+        LOG_ERROR << "No asset found for " << os_arch;
+      } catch (const json::parse_error& e) {
+        std::cerr << "JSON parse error: " << e.what() << std::endl;
+      }
+    }
+  } else {
+    auto err = res.error();
+    LOG_ERROR << "HTTP error: " << httplib::to_string(err);
+  }
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h
new file mode 100644
index 000000000..09b908e37
--- /dev/null
+++ b/engine/commands/engine_init_cmd.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <array>
+#include <string>
+
+namespace commands {
+
+class EngineInitCmd {
+ public:
+  EngineInitCmd(std::string engineName, std::string version);
+
+  void Exec() const;
+
+ private:
+  std::string engineName_;
+  std::string version_;
+
+  static constexpr std::array<const char*, 1> supportedEngines_ = {
+      "cortex.llamacpp"};
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_list_cmd.cc b/engine/commands/model_list_cmd.cc
new file mode 100644
index 000000000..2aedd6d24
--- /dev/null
+++ b/engine/commands/model_list_cmd.cc
@@ -0,0 +1,30 @@
+#include "model_list_cmd.h"
+#include <filesystem>
+#include <iostream>
+#include <vector>
+#include "utils/cortex_utils.h"
+#include "config/yaml_config.h"
+#include "trantor/utils/Logger.h"
+namespace commands {
+
+void ModelListCmd::Exec() {
+  if (std::filesystem::exists(cortex_utils::models_folder) &&
+      std::filesystem::is_directory(cortex_utils::models_folder)) {
+    // Iterate through directory
+    for (const auto& entry :
+         std::filesystem::directory_iterator(cortex_utils::models_folder)) {
+      if (entry.is_regular_file() && entry.path().extension() == ".yaml") {
+        try {
+            config::YamlHandler handler;
+            handler.ModelConfigFromFile(entry.path().string());
+            std::cout<<"Model ID: "<< entry.path().stem().string() <<", Engine: "<< handler.GetModelConfig().engine <<std::endl;
+
+        } catch (const std::exception& e) {
+          LOG_ERROR << "Error reading yaml file '" << entry.path().string()
+                    << "': " << e.what();
+        }
+      }
+    }
+  }
+}
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_list_cmd.h b/engine/commands/model_list_cmd.h
new file mode 100644
index 000000000..6a9f2aa5f
--- /dev/null
+++ b/engine/commands/model_list_cmd.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <string>
+
+namespace commands {
+
+class ModelListCmd {
+ public:
+  void Exec();
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc
index 6135efb43..9dcd8c4ef 100644
--- a/engine/commands/model_pull_cmd.cc
+++ b/engine/commands/model_pull_cmd.cc
@@ -3,6 +3,7 @@
 #include "services/download_service.h"
 #include "trantor/utils/Logger.h"
 #include "utils/cortexso_parser.h"
+#include "utils/model_callback_utils.h"
 
 namespace commands {
 ModelPullCmd::ModelPullCmd(std::string modelHandle)
@@ -12,7 +13,8 @@ void ModelPullCmd::Exec() {
   auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_);
   if (downloadTask.has_value()) {
     DownloadService downloadService;
-    downloadService.AddDownloadTask(downloadTask.value());
+    downloadService.AddDownloadTask(downloadTask.value(),
+                                    model_callback_utils::DownloadModelCb);
     std::cout << "Download finished" << std::endl;
   } else {
     std::cout << "Model not found" << std::endl;
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index 38c071546..d58760433 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -1,5 +1,7 @@
 #include "command_line_parser.h"
+#include "commands/engine_init_cmd.h"
 #include "commands/model_pull_cmd.h"
+#include "commands/model_list_cmd.h"
 #include "commands/start_model_cmd.h"
 #include "commands/stop_model_cmd.h"
 #include "commands/stop_server_cmd.h"
@@ -42,8 +44,11 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
 
     auto list_models_cmd =
         models_cmd->add_subcommand("list", "List all models locally");
+    list_models_cmd->callback([](){
+      commands::ModelListCmd command;
+      command.Exec();
+    });
 
-    //// Models group commands
     auto model_pull_cmd =
         app_.add_subcommand("pull",
                             "Download a model from a registry. Working with "
@@ -60,7 +65,6 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
     auto update_cmd =
         models_cmd->add_subcommand("update", "Update configuration of a model");
   }
-  //// End of Models group commands
 
   auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model");
 
@@ -71,11 +75,27 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
       "embeddings", "Creates an embedding vector representing the input text");
 
   // engines group commands
-  auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines");
-  auto list_engines_cmd =
-      engines_cmd->add_subcommand("list", "List all cortex engines");
-  auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
-  auto init_cmd = engines_cmd->add_subcommand("init", "Setup engine");
+  {
+    auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines");
+    auto list_engines_cmd =
+        engines_cmd->add_subcommand("list", "List all cortex engines");
+    auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
+
+    {  // Engine init command
+      auto init_cmd = engines_cmd->add_subcommand("init", "Initialize engine");
+      std::string engine_name;
+      std::string version = "latest";
+
+      init_cmd->add_option("-n,--name", engine_name,
+                           "Engine name. E.g: cortex.llamacpp");
+      init_cmd->add_option("-v,--version", version,
+                           "Engine version. Default will be latest");
+      init_cmd->callback([&engine_name, &version]() {
+        commands::EngineInitCmd eic(engine_name, version);
+        eic.Exec();
+      });
+    }
+  }
 
   auto run_cmd =
       app_.add_subcommand("run", "Shortcut to start a model and chat");
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
new file mode 100644
index 000000000..e445ff90a
--- /dev/null
+++ b/engine/controllers/models.cc
@@ -0,0 +1,104 @@
+#include "models.h"
+#include "config/yaml_config.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
+#include "utils/model_callback_utils.h"
+
+void Models::PullModel(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) const {
+  if (!http_util::HasFieldInReq(req, callback, "modelId")) {
+    return;
+  }
+  auto modelHandle = (*(req->getJsonObject())).get("modelId", "").asString();
+  LOG_DEBUG << "PullModel, Model handle: " << modelHandle;
+  if (modelHandle.empty()) {
+    Json::Value ret;
+    ret["result"] = "Bad Request";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
+
+  auto downloadTask = cortexso_parser::getDownloadTask(modelHandle);
+  if (downloadTask.has_value()) {
+    DownloadService downloadService;
+    downloadService.AddAsyncDownloadTask(downloadTask.value(),
+                                         model_callback_utils::DownloadModelCb);
+
+    Json::Value ret;
+    ret["result"] = "OK";
+    ret["modelHandle"] = modelHandle;
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  } else {
+    Json::Value ret;
+    ret["result"] = "Not Found";
+    ret["modelHandle"] = modelHandle;
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k404NotFound);
+    callback(resp);
+  }
+}
+
+void Models::ListModel(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) const {
+  Json::Value ret;
+  ret["object"] = "list";
+  Json::Value data(Json::arrayValue);
+  if (std::filesystem::exists(cortex_utils::models_folder) &&
+      std::filesystem::is_directory(cortex_utils::models_folder)) {
+    // Iterate through directory
+    for (const auto& entry :
+         std::filesystem::directory_iterator(cortex_utils::models_folder)) {
+      if (entry.is_regular_file() && entry.path().extension() == ".yaml") {
+        try {
+          config::YamlHandler handler;
+          handler.ModelConfigFromFile(entry.path().string());
+          auto const& model_config = handler.GetModelConfig();
+          Json::Value obj;
+          obj["name"] = model_config.name;
+          obj["model"] = model_config.model;
+          obj["version"] = model_config.version;
+          Json::Value stop_array(Json::arrayValue);
+          for (const std::string& stop : model_config.stop)
+            stop_array.append(stop);
+          obj["stop"] = stop_array;
+          obj["top_p"] = model_config.top_p;
+          obj["temperature"] = model_config.temperature;
+          obj["presence_penalty"] = model_config.presence_penalty;
+          obj["max_tokens"] = model_config.max_tokens;
+          obj["stream"] = model_config.stream;
+          obj["ngl"] = model_config.ngl;
+          obj["ctx_len"] = model_config.ctx_len;
+          obj["engine"] = model_config.engine;
+          obj["prompt_template"] = model_config.prompt_template;
+
+          Json::Value files_array(Json::arrayValue);
+          for (const std::string& file : model_config.files)
+            files_array.append(file);
+          obj["files"] = files_array;
+          obj["id"] = model_config.id;
+          obj["created"] = static_cast<uint32_t>(model_config.created);
+          obj["object"] = model_config.object;
+          obj["owned_by"] = model_config.owned_by;
+          if (model_config.engine == "cortex.tensorrt-llm") {
+            obj["trtllm_version"] = model_config.trtllm_version;
+          }
+          data.append(std::move(obj));
+        } catch (const std::exception& e) {
+          LOG_ERROR << "Error reading yaml file '" << entry.path().string()
+                    << "': " << e.what();
+        }
+      }
+    }
+  }
+  ret["data"] = data;
+  ret["result"] = "OK";
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k200OK);
+  callback(resp);
+}
\ No newline at end of file
diff --git a/engine/controllers/models.h b/engine/controllers/models.h
new file mode 100644
index 000000000..d8f9415e4
--- /dev/null
+++ b/engine/controllers/models.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <drogon/HttpController.h>
+#include <trantor/utils/Logger.h>
+#include "services/download_service.h"
+#include "utils/cortex_utils.h"
+#include "utils/cortexso_parser.h"
+#include "utils/http_util.h"
+
+using namespace drogon;
+
+class Models : public drogon::HttpController<Models> {
+ public:
+  METHOD_LIST_BEGIN
+  METHOD_ADD(Models::PullModel, "/pull", Post);
+  METHOD_ADD(Models::ListModel, "/list", Get);
+  METHOD_LIST_END
+
+  void PullModel(const HttpRequestPtr& req,
+                 std::function<void(const HttpResponsePtr&)>&& callback) const;
+  void ListModel(const HttpRequestPtr& req,
+                 std::function<void(const HttpResponsePtr&)>&& callback) const;
+};
\ No newline at end of file
diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 3de3bc2ba..0c5963d7a 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -419,4 +419,4 @@ bool server::HasFieldInReq(
   return true;
 }
 
-}  // namespace inferences
+}  // namespace inferences
\ No newline at end of file
diff --git a/engine/controllers/server.h b/engine/controllers/server.h
index 1b1360baf..6d811192d 100644
--- a/engine/controllers/server.h
+++ b/engine/controllers/server.h
@@ -20,6 +20,8 @@
 #include "cortex-common/EngineI.h"
 #include "cortex-common/cortexpythoni.h"
 #include "trantor/utils/SerialTaskQueue.h"
+#include "config/yaml_config.h"
+#include "config/gguf_parser.h"
 #include "utils/dylib.h"
 #include "utils/json.hpp"
 #ifndef SERVER_VERBOSE
@@ -155,4 +157,4 @@ class server : public drogon::HttpController<server>,
   std::unordered_map<std::string, EngineInfo> engines_;
   std::string cur_engine_type_;
 };
-};  // namespace inferences
+};  // namespace inferences
\ No newline at end of file
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index c5c970412..4a60a42a8 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -1,3 +1,4 @@
+#include <httplib.h>
 #include <trantor/utils/Logger.h>
 #include <filesystem>
 #include <fstream>
@@ -5,82 +6,60 @@
 #include <thread>
 
 #include "download_service.h"
+#include "utils/file_manager_utils.h"
 
-void DownloadService::AddDownloadTask(const DownloadTask& task) {
+void DownloadService::AddDownloadTask(const DownloadTask& task,
+                                      std::optional<DownloadItemCb> callback) {
   tasks.push_back(task);
 
   for (const auto& item : task.items) {
-    StartDownloadItem(task.id, item);
+    StartDownloadItem(task.id, item, callback);
   }
 }
 
-void DownloadService::AddAsyncDownloadTask(const DownloadTask& task) {
+void DownloadService::AddAsyncDownloadTask(
+    const DownloadTask& task, std::optional<DownloadItemCb> callback) {
   tasks.push_back(task);
+
   for (const auto& item : task.items) {
     // TODO: maybe apply std::async is better?
-    std::thread([this, task, item]() {
-      this->StartDownloadItem(task.id, item);
+    std::thread([this, task, &callback, item]() {
+      this->StartDownloadItem(task.id, item, callback);
     }).detach();
   }
 }
 
-const std::string DownloadService::GetContainerFolderPath(DownloadType type) {
-  std::filesystem::path container_folder_path;
-
-  switch (type) {
-    case DownloadType::Model: {
-      container_folder_path = std::filesystem::current_path() / "models";
-      break;
-    }
-    case DownloadType::Engine: {
-      container_folder_path = std::filesystem::current_path() / "engines";
-      break;
-    }
-    default: {
-      container_folder_path = std::filesystem::current_path() / "misc";
-      break;
-    }
-  }
+void DownloadService::StartDownloadItem(
+    const std::string& downloadId, const DownloadItem& item,
+    std::optional<DownloadItemCb> callback) {
+  LOG_INFO << "Downloading item: " << downloadId;
 
-  if (!std::filesystem::exists(container_folder_path)) {
-    LOG_INFO << "Creating folder: " << container_folder_path.string() << "\n";
-    std::filesystem::create_directory(container_folder_path);
-  }
+  auto containerFolderPath{file_manager_utils::GetContainerFolderPath(
+      file_manager_utils::downloadTypeToString(item.type))};
+  LOG_INFO << "Container folder path: " << containerFolderPath.string() << "\n";
 
-  return container_folder_path.string();
-}
-
-void DownloadService::StartDownloadItem(const std::string& downloadId,
-                                        const DownloadItem& item,
-                                        const DownloadItemCb& callback) {
-  LOG_INFO << "Downloading item: " << downloadId;
-  const std::string containerFolderPath = GetContainerFolderPath(item.type);
-  LOG_INFO << "Container folder path: " << containerFolderPath << "\n";
-  const std::filesystem::path itemFolderPath =
-      std::filesystem::path(containerFolderPath) /
-      std::filesystem::path(downloadId);
+  auto itemFolderPath{containerFolderPath / std::filesystem::path(downloadId)};
+  LOG_INFO << "itemFolderPath: " << itemFolderPath.string();
   if (!std::filesystem::exists(itemFolderPath)) {
     LOG_INFO << "Creating " << itemFolderPath.string();
     std::filesystem::create_directory(itemFolderPath);
   }
 
-  LOG_INFO << "itemFolderPath: " << itemFolderPath.string();
-  auto outputFilePath = itemFolderPath / std::filesystem::path(item.fileName);
+  auto outputFilePath{itemFolderPath / std::filesystem::path(item.fileName)};
   LOG_INFO << "Absolute file output: " << outputFilePath.string();
 
   uint64_t last = 0;
   uint64_t tot = 0;
   std::ofstream outputFile(outputFilePath, std::ios::binary);
 
-  std::ostringstream downloadUrl;
-  downloadUrl << item.host << "/" << item.path;
-  LOG_INFO << "Downloading url: " << downloadUrl.str();
+  auto downloadUrl{item.host + "/" + item.path};
+  LOG_INFO << "Downloading url: " << downloadUrl;
 
   httplib::Client client(item.host);
 
   client.set_follow_location(true);
   client.Get(
-      downloadUrl.str(),
+      downloadUrl,
       [](const httplib::Response& res) {
         if (res.status != httplib::StatusCode::OK_200) {
           LOG_ERROR << "HTTP error: " << res.reason;
@@ -93,19 +72,21 @@ void DownloadService::StartDownloadItem(const std::string& downloadId,
         outputFile.write(data, data_length);
         return true;
       },
-      [&last, this](uint64_t current, uint64_t total) {
+      [&last, &outputFile, &callback, outputFilePath, this](uint64_t current,
+                                                            uint64_t total) {
         if (current - last > kUpdateProgressThreshold) {
           last = current;
           LOG_INFO << "Downloading: " << current << " / " << total;
         }
         if (current == total) {
+          outputFile.flush();
           LOG_INFO << "Done download: "
                    << static_cast<double>(total) / 1024 / 1024 << " MiB";
+          if (callback.has_value()) {
+            callback.value()(outputFilePath.string());
+          }
           return false;
         }
         return true;
       });
-  if(callback){
-    callback(outputFilePath.string());
-  }
 }
\ No newline at end of file
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
index e7fc14b6d..86aefeb52 100644
--- a/engine/services/download_service.h
+++ b/engine/services/download_service.h
@@ -1,8 +1,8 @@
 #pragma once
 
+#include <functional>
 #include <optional>
 #include <vector>
-#include "httplib.h"
 
 enum class DownloadType { Model, Engine, Miscellaneous };
 
@@ -24,20 +24,12 @@ struct DownloadItem {
 
   std::string path;
 
-  uint64_t totalSize;
-
-  uint64_t transferredSize;
-
-  DownloadStatus status;
-
   std::optional<std::string> checksum;
 };
 
 struct DownloadTask {
   std::string id;
   DownloadType type;
-  float percentage;
-  DownloadStatus status;
   std::optional<std::string> error;
   std::vector<DownloadItem> items;
 };
@@ -50,9 +42,12 @@ class DownloadService {
   * @param task 
   */
   using DownloadItemCb = std::function<void(const std::string&)>;
-  void AddDownloadTask(const DownloadTask& task);
+  void AddDownloadTask(const DownloadTask& task,
+                       std::optional<DownloadItemCb> callback = std::nullopt);
 
-  void AddAsyncDownloadTask(const DownloadTask& task);
+  void AddAsyncDownloadTask(
+      const DownloadTask& task,
+      std::optional<DownloadItemCb> callback = std::nullopt);
 
   // TODO: [NamH] implement the following methods
   //  void removeTask(const std::string &id);
@@ -63,9 +58,7 @@ class DownloadService {
  private:
   void StartDownloadItem(const std::string& downloadId,
                          const DownloadItem& item,
-                         const DownloadItemCb& callback = nullptr);
-
-  const std::string GetContainerFolderPath(DownloadType type);
+                         std::optional<DownloadItemCb> callback = std::nullopt);
 
   // store tasks so we can abort it later
   std::vector<DownloadTask> tasks;
diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h
index 6f8a89658..777cd6d84 100644
--- a/engine/utils/cortex_utils.h
+++ b/engine/utils/cortex_utils.h
@@ -7,13 +7,16 @@
 #include <ostream>
 #include <regex>
 #include <vector>
-#include "cstdio"
-#include "random"
-#include "string"
+#include <random>
+#include <string>
+#include <sys/stat.h>
+
 // Include platform-specific headers
 #ifdef _WIN32
 #include <windows.h>
 #include <winsock2.h>
+#include <direct.h>
+#define mkdir _mkdir
 #else
 #include <dirent.h>
 #include <unistd.h>
@@ -31,6 +34,9 @@ constexpr static auto kOnnxLibPath = "/engines/cortex.onnx";
 constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm";
 
 inline std::string models_folder = "./models";
+inline std::string logs_folder = "./logs";
+inline std::string logs_base_name = "./logs/cortex";
+inline size_t log_file_size_limit = 20000000; // ~20 mb
 
 inline std::string extractBase64(const std::string& input) {
   std::regex pattern("base64,(.*)");
@@ -336,4 +342,4 @@ inline std::string GetCurrentPath() {
 }
 #endif
 
-}  // namespace cortex_utils
+}  // namespace cortex_utils
\ No newline at end of file
diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h
index 6150e9f0a..04f6e7fa5 100644
--- a/engine/utils/cortexso_parser.h
+++ b/engine/utils/cortexso_parser.h
@@ -41,22 +41,17 @@ inline std::optional<DownloadTask> getDownloadTask(
           downloadItem.fileName = path;
           downloadItem.type = DownloadType::Model;
           downloadItem.path = downloadUrl;
-          downloadItem.totalSize = value["size"].get<int>();
-          downloadItem.transferredSize = 0;
-          downloadItem.status = DownloadStatus::Pending;
           downloadItems.push_back(downloadItem);
         }
 
         DownloadTask downloadTask{};
         downloadTask.id = modelId;
         downloadTask.type = DownloadType::Model;
-        downloadTask.percentage = 0.0f;
-        downloadTask.status = DownloadStatus::Pending;
         downloadTask.error = std::nullopt;
         downloadTask.items = downloadItems;
 
         return downloadTask;
-      } catch (const nlohmann::json::parse_error& e) {
+      } catch (const json::parse_error& e) {
         std::cerr << "JSON parse error: " << e.what() << std::endl;
       }
     }
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
new file mode 100644
index 000000000..77c6b74a6
--- /dev/null
+++ b/engine/utils/file_manager_utils.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <filesystem>
+#include <string>
+#include <string_view>
+
+namespace file_manager_utils {
+
+inline std::filesystem::path GetContainerFolderPath(
+    const std::string_view type) {
+  const auto current_path{std::filesystem::current_path()};
+  auto container_folder_path = std::filesystem::path{};
+
+  if (type == "Model") {
+    container_folder_path = current_path / "models";
+  } else if (type == "Engine") {
+    container_folder_path = current_path / "engines";
+  } else {
+    container_folder_path = current_path / "misc";
+  }
+
+  if (!std::filesystem::exists(container_folder_path)) {
+    LOG_INFO << "Creating folder: " << container_folder_path.string() << "\n";
+    std::filesystem::create_directory(container_folder_path);
+  }
+
+  return container_folder_path;
+}
+
+inline std::string downloadTypeToString(DownloadType type) {
+  switch (type) {
+    case DownloadType::Model:
+      return "Model";
+    case DownloadType::Engine:
+      return "Engine";
+    case DownloadType::Miscellaneous:
+      return "Misc";
+    default:
+      return "UNKNOWN";
+  }
+}
+
+}  // namespace file_manager_utils
\ No newline at end of file
diff --git a/engine/utils/http_util.h b/engine/utils/http_util.h
index 73c53668f..471ef3b27 100644
--- a/engine/utils/http_util.h
+++ b/engine/utils/http_util.h
@@ -6,9 +6,9 @@ using namespace drogon;
 
 namespace http_util {
 
-bool HasFieldInReq(const HttpRequestPtr& req,
-                   std::function<void(const HttpResponsePtr&)>& callback,
-                   const std::string& field) {
+inline bool HasFieldInReq(const HttpRequestPtr& req,
+                          std::function<void(const HttpResponsePtr&)>& callback,
+                          const std::string& field) {
   if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) {
     Json::Value res;
     res["message"] = "No " + field + " field in request body";
diff --git a/engine/utils/model_callback_utils.h b/engine/utils/model_callback_utils.h
new file mode 100644
index 000000000..753fdb205
--- /dev/null
+++ b/engine/utils/model_callback_utils.h
@@ -0,0 +1,51 @@
+#pragma once
+#include <trantor/utils/Logger.h>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <thread>
+
+#include "config/gguf_parser.h"
+#include "config/yaml_config.h"
+#include "utils/file_manager_utils.h"
+
+namespace model_callback_utils {
+inline void DownloadModelCb(const std::string& path) {
+
+  std::filesystem::path path_obj(path);
+  std::string filename(path_obj.filename().string());
+  //TODO: handle many cases of downloaded items from other sources except cortexso.
+  if (filename.compare("model.yml") == 0) {
+    config::YamlHandler handler;
+    handler.ModelConfigFromFile(path);
+    config::ModelConfig model_config = handler.GetModelConfig();
+    model_config.id = path_obj.parent_path().filename().string();
+
+    LOG_INFO << "Updating model config in " << path;
+    handler.UpdateModelConfig(model_config);
+    handler.WriteYamlFile(path_obj.parent_path().parent_path().string() + "/" +
+                          model_config.id + ".yaml");
+  }
+  // currently, only handle downloaded model with only 1 .gguf file
+  // TODO: handle multipart gguf file or different model in 1 repo.
+  else if (path_obj.extension().string().compare(".gguf") == 0) {
+
+    config::GGUFHandler gguf_handler;
+    config::YamlHandler yaml_handler;
+    gguf_handler.Parse(path);
+    config::ModelConfig model_config = gguf_handler.GetModelConfig();
+    model_config.id = path_obj.parent_path().filename().string();
+    model_config.files = {path};
+    yaml_handler.UpdateModelConfig(model_config);
+    std::string yml_path(path_obj.parent_path().parent_path().string() + "/" +
+                         model_config.id + ".yaml");
+    std::string yaml_path(path_obj.parent_path().string() + "/model.yml");
+    if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exsited
+      yaml_handler.WriteYamlFile(yml_path);
+    }
+    if (!std::filesystem::exists(yaml_path)) {// if <model_id>.yaml doesn't exsited
+      yaml_handler.WriteYamlFile(yaml_path);
+    }
+  }
+}
+}  // namespace model_callback_utils
\ No newline at end of file
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
new file mode 100644
index 000000000..184428751
--- /dev/null
+++ b/engine/utils/system_info_utils.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <ostream>
+
+namespace system_info_utils {
+struct SystemInfo {
+  std::string os;
+  std::string arch;
+};
+
+constexpr static auto kUnsupported{"Unsupported"};
+
+inline SystemInfo GetSystemInfo() {
+  std::ostringstream arch;
+  std::ostringstream os;
+
+#if defined(__i386__) || defined(__x86_64__)
+  arch << "amd64";
+#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
+  arch << "arm64";
+#else
+  arch << kUnsupported;
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__)
+  os << "mac";
+#elif defined(__linux__)
+  os << "linux";
+#elif defined(_WIN32)
+  os << "windows";
+#else
+  os << kUnsupported;
+#endif
+  return SystemInfo{os.str(), arch.str()};
+}
+}  // namespace system_info_utils
\ No newline at end of file

From 1b2e0922d5c6743bf002e216f96d422db06bc25d Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 27 Aug 2024 11:43:24 +0700
Subject: [PATCH 08/16] fix: add command parser for main

---
 engine/main.cc | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/engine/main.cc b/engine/main.cc
index 6ec7ea205..27591d48a 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -2,9 +2,11 @@
 #include <drogon/drogon.h>
 #include <climits>  // for PATH_MAX
 #include <iostream>
+#include "controllers/command_line_parser.h"
 #include "cortex-common/cortexpythoni.h"
 #include "utils/cortex_utils.h"
 #include "utils/dylib.h"
+#include "utils/archive_utils.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -20,6 +22,18 @@
 #endif
 
 int main(int argc, char* argv[]) {
+  // Create logs/ folder and setup log to file
+  std::filesystem::create_directory(cortex_utils::logs_folder);
+  trantor::AsyncFileLogger asyncFileLogger;
+  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+  asyncFileLogger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        asyncFileLogger.output(msg, len);
+      },
+      [&]() { asyncFileLogger.flush(); });
+  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+
   // Check if this process is for python execution
   if (argc > 1) {
     if (strcmp(argv[1], "--run_python_file") == 0) {
@@ -41,11 +55,17 @@ int main(int argc, char* argv[]) {
     }
   }
 
+  if (argc > 1) {
+    CommandLineParser clp;
+    clp.SetupCommand(argc, argv);
+    return 0;
+  }
+
   int thread_num = 1;
   std::string host = "127.0.0.1";
   int port = 3928;
 
-  // Number of cortex-cpp threads
+  // Number of cortex.cpp threads
   if (argc > 1) {
     thread_num = std::atoi(argv[1]);
   }
@@ -64,9 +84,9 @@ int main(int argc, char* argv[]) {
   int drogon_thread_num = std::max(thread_num, logical_cores);
   // cortex_utils::nitro_logo();
 #ifdef CORTEX_CPP_VERSION
-  LOG_INFO << "cortex-cpp version: " << CORTEX_CPP_VERSION;
+  LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
 #else
-  LOG_INFO << "cortex-cpp version: undefined";
+  LOG_INFO << "cortex.cpp version: undefined";
 #endif
 
   LOG_INFO << "Server started, listening at: " << host << ":" << port;
@@ -78,4 +98,4 @@ int main(int argc, char* argv[]) {
   drogon::app().run();
 
   return 0;
-}
+}
\ No newline at end of file

From 95f7d67c85a897234be999fab45890beb5e5999d Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Wed, 28 Aug 2024 10:24:40 +0700
Subject: [PATCH 09/16] feat: chat command (#1032)

---
 engine/commands/chat_cmd.cc               | 120 ++++++++++++++++++++++
 engine/commands/chat_cmd.h                |  19 ++++
 engine/controllers/command_line_parser.cc |  20 +++-
 engine/main.cc                            |  25 ++---
 4 files changed, 171 insertions(+), 13 deletions(-)
 create mode 100644 engine/commands/chat_cmd.cc
 create mode 100644 engine/commands/chat_cmd.h

diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc
new file mode 100644
index 000000000..185dd60fe
--- /dev/null
+++ b/engine/commands/chat_cmd.cc
@@ -0,0 +1,120 @@
+#include "chat_cmd.h"
+#include "httplib.h"
+
+#include "trantor/utils/Logger.h"
+
+namespace commands {
+namespace {
+constexpr const char* kExitChat = "exit()";
+constexpr const auto kMinDataChunkSize = 6u;
+constexpr const char* kUser = "user";
+constexpr const char* kAssistant = "assistant";
+
+}  // namespace
+
+struct ChunkParser {
+  std::string content;
+  bool is_done = false;
+
+  ChunkParser(const char* data, size_t data_length) {
+    if (data && data_length > kMinDataChunkSize) {
+      std::string s(data + kMinDataChunkSize, data_length - kMinDataChunkSize);
+      if (s.find("[DONE]") != std::string::npos) {
+        is_done = true;
+      } else {
+        content = nlohmann::json::parse(s)["choices"][0]["delta"]["content"];
+      }
+    }
+  }
+};
+
+ChatCmd::ChatCmd(std::string host, int port, const config::ModelConfig& mc)
+    : host_(std::move(host)), port_(port), mc_(mc) {}
+
+void ChatCmd::Exec(std::string msg) {
+  auto address = host_ + ":" + std::to_string(port_);
+  // Check if model is loaded
+  {
+    httplib::Client cli(address);
+    nlohmann::json json_data;
+    json_data["model"] = mc_.name;
+    json_data["engine"] = mc_.engine;
+
+    auto data_str = json_data.dump();
+
+    // TODO: move this to another message?
+    auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
+                        data_str.data(), data_str.size(), "application/json");
+    if (res) {
+      if (res->status != httplib::StatusCode::OK_200) {
+        LOG_INFO << res->body;
+        return;
+      }
+    } else {
+      auto err = res.error();
+      LOG_WARN << "HTTP error: " << httplib::to_string(err);
+      return;
+    }
+  }
+  // Some instruction for user here
+  std::cout << "Inorder to exit, type exit()" << std::endl;
+  // Model is loaded, start to chat
+  {
+    while (true) {
+      std::string user_input = std::move(msg);
+      std::cout << "> ";
+      if (user_input.empty()) {
+        std::getline(std::cin, user_input);
+      }
+      if (user_input == kExitChat) {
+        break;
+      }
+
+      if (!user_input.empty()) {
+        httplib::Client cli(address);
+        nlohmann::json json_data;
+        nlohmann::json new_data;
+        new_data["role"] = kUser;
+        new_data["content"] = user_input;
+        histories_.push_back(std::move(new_data));
+        json_data["engine"] = mc_.engine;
+        json_data["messages"] = histories_;
+        json_data["model"] = mc_.name;
+        //TODO: support non-stream
+        json_data["stream"] = true;
+        json_data["stop"] = mc_.stop;
+        auto data_str = json_data.dump();
+        // std::cout << data_str << std::endl;
+        cli.set_read_timeout(std::chrono::seconds(60));
+        // std::cout << "> ";
+        httplib::Request req;
+        req.headers = httplib::Headers();
+        req.set_header("Content-Type", "application/json");
+        req.method = "POST";
+        req.path = "/v1/chat/completions";
+        req.body = data_str;
+        std::string ai_chat;
+        req.content_receiver = [&](const char* data, size_t data_length,
+                                   uint64_t offset, uint64_t total_length) {
+          ChunkParser cp(data, data_length);
+          if (cp.is_done) {
+            std::cout << std::endl;
+            return false;
+          }
+          std::cout << cp.content;
+          ai_chat += cp.content;
+          return true;
+        };
+        cli.send(req);
+
+        nlohmann::json ai_res;
+        ai_res["role"] = kAssistant;
+        ai_res["content"] = ai_chat;
+        histories_.push_back(std::move(ai_res));
+      }
+      // std::cout << "ok Done" << std::endl;
+    }
+  }
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/chat_cmd.h b/engine/commands/chat_cmd.h
new file mode 100644
index 000000000..d5b48927c
--- /dev/null
+++ b/engine/commands/chat_cmd.h
@@ -0,0 +1,19 @@
+#pragma once
+#include <string>
+#include <vector>
+#include "config/model_config.h"
+#include "nlohmann/json.hpp"
+
+namespace commands {
+class ChatCmd {
+ public:
+  ChatCmd(std::string host, int port, const config::ModelConfig& mc);
+  void Exec(std::string msg);
+
+ private:
+  std::string host_;
+  int port_;
+  const config::ModelConfig& mc_;
+  std::vector<nlohmann::json> histories_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index d58760433..48c63611d 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -5,6 +5,7 @@
 #include "commands/start_model_cmd.h"
 #include "commands/stop_model_cmd.h"
 #include "commands/stop_server_cmd.h"
+#include "commands/chat_cmd.h"
 #include "config/yaml_config.h"
 #include "utils/cortex_utils.h"
 
@@ -66,7 +67,24 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
         models_cmd->add_subcommand("update", "Update configuration of a model");
   }
 
-  auto chat_cmd = app_.add_subcommand("chat", "Send a chat request to a model");
+  {
+    auto chat_cmd =
+        app_.add_subcommand("chat", "Send a chat request to a model");
+    std::string model_id;
+    chat_cmd->add_option("model_id", model_id, "");
+    std::string msg;
+    chat_cmd->add_option("-m,--message", msg,
+                           "Message to chat with model");
+
+    chat_cmd->callback([&model_id, &msg] {
+      // TODO(sang) switch to <model_id>.yaml when implement model manager
+      config::YamlHandler yaml_handler;
+      yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() +
+                                       "/models/" + model_id + "/model.yml");
+      commands::ChatCmd cc("127.0.0.1", 3928, yaml_handler.GetModelConfig());
+      cc.Exec(msg);
+    });
+  }
 
   auto ps_cmd =
       app_.add_subcommand("ps", "Show running models and their status");
diff --git a/engine/main.cc b/engine/main.cc
index 27591d48a..a92e114fb 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -22,18 +22,7 @@
 #endif
 
 int main(int argc, char* argv[]) {
-  // Create logs/ folder and setup log to file
-  std::filesystem::create_directory(cortex_utils::logs_folder);
-  trantor::AsyncFileLogger asyncFileLogger;
-  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
-  asyncFileLogger.startLogging();
-  trantor::Logger::setOutputFunction(
-      [&](const char* msg, const uint64_t len) {
-        asyncFileLogger.output(msg, len);
-      },
-      [&]() { asyncFileLogger.flush(); });
-  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
-
+  
   // Check if this process is for python execution
   if (argc > 1) {
     if (strcmp(argv[1], "--run_python_file") == 0) {
@@ -61,6 +50,18 @@ int main(int argc, char* argv[]) {
     return 0;
   }
 
+  // Create logs/ folder and setup log to file
+  std::filesystem::create_directory(cortex_utils::logs_folder);
+  trantor::AsyncFileLogger asyncFileLogger;
+  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+  asyncFileLogger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        asyncFileLogger.output(msg, len);
+      },
+      [&]() { asyncFileLogger.flush(); });
+  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+
   int thread_num = 1;
   std::string host = "127.0.0.1";
   int port = 3928;

From 287c75065767fdc270db24c421196a07a5b0c425 Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Tue, 27 Aug 2024 14:35:47 +0700
Subject: [PATCH 10/16] feat: add engine init cli

---
 engine/commands/engine_init_cmd.cc        |  47 +++-
 engine/commands/engine_init_cmd.h         |   4 +-
 engine/controllers/command_line_parser.cc |  41 ++--
 engine/controllers/command_line_parser.h  |   2 +
 engine/controllers/engines.cc             | 111 +++++++++
 engine/controllers/engines.h              |  21 ++
 engine/main.cc                            |   5 +-
 engine/utils/command_executor.h           |  49 ++++
 engine/utils/engine_matcher_utils.h       | 180 ++++++++++++++
 engine/utils/system_info_utils.h          | 273 +++++++++++++++++++++-
 10 files changed, 696 insertions(+), 37 deletions(-)
 create mode 100644 engine/controllers/engines.cc
 create mode 100644 engine/controllers/engines.h
 create mode 100644 engine/utils/command_executor.h
 create mode 100644 engine/utils/engine_matcher_utils.h

diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc
index 430433e4d..b4f8fe064 100644
--- a/engine/commands/engine_init_cmd.cc
+++ b/engine/commands/engine_init_cmd.cc
@@ -7,6 +7,7 @@
 #include "utils/archive_utils.h"   
 #include "utils/system_info_utils.h"
 // clang-format on
+#include "utils/engine_matcher_utils.h"
 
 namespace commands {
 
@@ -27,6 +28,7 @@ void EngineInitCmd::Exec() const {
               << system_info.arch;
     return;
   }
+  LOG_INFO << "OS: " << system_info.os << ", Arch: " << system_info.arch;
 
   // check if engine is supported
   if (std::find(supportedEngines_.begin(), supportedEngines_.end(),
@@ -36,11 +38,11 @@ void EngineInitCmd::Exec() const {
   }
 
   constexpr auto gitHubHost = "https://api.github.com";
-
+  std::string version = version_.empty() ? "latest" : version_;
   std::ostringstream engineReleasePath;
   engineReleasePath << "/repos/janhq/" << engineName_ << "/releases/"
-                    << version_;
-
+                    << version;
+  LOG_INFO << "Engine release path: " << gitHubHost << engineReleasePath.str();
   using namespace nlohmann;
 
   httplib::Client cli(gitHubHost);
@@ -51,9 +53,37 @@ void EngineInitCmd::Exec() const {
         auto assets = jsonResponse["assets"];
         auto os_arch{system_info.os + "-" + system_info.arch};
 
+        std::vector<std::string> variants;
+        for (auto& asset : assets) {
+          auto asset_name = asset["name"].get<std::string>();
+          variants.push_back(asset_name);
+        }
+
+        auto cuda_version = system_info_utils::GetCudaVersion();
+        LOG_INFO << "engineName_: " << engineName_;
+        LOG_INFO << "CUDA version: " << cuda_version;
+        std::string matched_variant = "";
+        if (engineName_ == "cortex.tensorrt-llm") {
+          matched_variant = engine_matcher_utils::ValidateTensorrtLlm(
+              variants, system_info.os, cuda_version);
+        } else if (engineName_ == "cortex.onnx") {
+          matched_variant = engine_matcher_utils::ValidateOnnx(
+              variants, system_info.os, system_info.arch);
+        } else if (engineName_ == "cortex.llamacpp") {
+          auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant();
+          matched_variant = engine_matcher_utils::Validate(
+              variants, system_info.os, system_info.arch, suitable_avx,
+              cuda_version);
+        }
+        LOG_INFO << "Matched variant: " << matched_variant;
+        if (matched_variant.empty()) {
+          LOG_ERROR << "No variant found for " << os_arch;
+          return;
+        }
+
         for (auto& asset : assets) {
           auto assetName = asset["name"].get<std::string>();
-          if (assetName.find(os_arch) != std::string::npos) {
+          if (assetName == matched_variant) {
             std::string host{"https://github.com"};
 
             auto full_url = asset["browser_download_url"].get<std::string>();
@@ -74,8 +104,7 @@ void EngineInitCmd::Exec() const {
                                              }}};
 
             DownloadService().AddDownloadTask(
-                downloadTask,
-                [&downloadTask](const std::string& absolute_path) {
+                downloadTask, [](const std::string& absolute_path) {
                   // try to unzip the downloaded file
                   std::filesystem::path downloadedEnginePath{absolute_path};
                   LOG_INFO << "Downloaded engine path: "
@@ -95,15 +124,15 @@ void EngineInitCmd::Exec() const {
             return;
           }
         }
-        LOG_ERROR << "No asset found for " << os_arch;
       } catch (const json::parse_error& e) {
         std::cerr << "JSON parse error: " << e.what() << std::endl;
       }
+    } else {
+      LOG_ERROR << "HTTP error: " << res->status;
     }
   } else {
     auto err = res.error();
     LOG_ERROR << "HTTP error: " << httplib::to_string(err);
   }
 }
-
-};  // namespace commands
\ No newline at end of file
+};  // namespace commands
diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h
index 09b908e37..dc75d5cf6 100644
--- a/engine/commands/engine_init_cmd.h
+++ b/engine/commands/engine_init_cmd.h
@@ -15,7 +15,7 @@ class EngineInitCmd {
   std::string engineName_;
   std::string version_;
 
-  static constexpr std::array<const char*, 1> supportedEngines_ = {
-      "cortex.llamacpp"};
+  static constexpr std::array<const char*, 3> supportedEngines_ = {
+      "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"};
 };
 }  // namespace commands
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index d58760433..e409ab90a 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -1,7 +1,7 @@
 #include "command_line_parser.h"
 #include "commands/engine_init_cmd.h"
-#include "commands/model_pull_cmd.h"
 #include "commands/model_list_cmd.h"
+#include "commands/model_pull_cmd.h"
 #include "commands/start_model_cmd.h"
 #include "commands/stop_model_cmd.h"
 #include "commands/stop_server_cmd.h"
@@ -44,7 +44,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
 
     auto list_models_cmd =
         models_cmd->add_subcommand("list", "List all models locally");
-    list_models_cmd->callback([](){
+    list_models_cmd->callback([]() {
       commands::ModelListCmd command;
       command.Exec();
     });
@@ -74,27 +74,15 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   auto embeddings_cmd = app_.add_subcommand(
       "embeddings", "Creates an embedding vector representing the input text");
 
-  // engines group commands
-  {
+  {  // engines group commands
     auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines");
     auto list_engines_cmd =
         engines_cmd->add_subcommand("list", "List all cortex engines");
     auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
 
-    {  // Engine init command
-      auto init_cmd = engines_cmd->add_subcommand("init", "Initialize engine");
-      std::string engine_name;
-      std::string version = "latest";
-
-      init_cmd->add_option("-n,--name", engine_name,
-                           "Engine name. E.g: cortex.llamacpp");
-      init_cmd->add_option("-v,--version", version,
-                           "Engine version. Default will be latest");
-      init_cmd->callback([&engine_name, &version]() {
-        commands::EngineInitCmd eic(engine_name, version);
-        eic.Exec();
-      });
-    }
+    EngineInstall(engines_cmd, "cortex.llamacpp");
+    EngineInstall(engines_cmd, "cortex.onnx");
+    EngineInstall(engines_cmd, "cortex.tensorrt-llm");
   }
 
   auto run_cmd =
@@ -110,4 +98,21 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
 
   CLI11_PARSE(app_, argc, argv);
   return true;
+}
+
+void CommandLineParser::EngineInstall(CLI::App* parent,
+                                      const std::string& engine_name) {
+  auto engine_cmd =
+      parent->add_subcommand(engine_name, "Manage " + engine_name + " engine");
+
+  // Default version is latest
+  std::string version{"latest"};
+  auto install_cmd = engine_cmd->add_subcommand(
+      "install", "Install " + engine_name + " engine");
+  install_cmd->add_option("-v, --version", version,
+                          "Engine version. Default will be latest");
+  install_cmd->callback([&engine_name, &version] {
+    commands::EngineInitCmd eic(engine_name, version);
+    eic.Exec();
+  });
 }
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h
index 3324d45e0..e48ed31b0 100644
--- a/engine/controllers/command_line_parser.h
+++ b/engine/controllers/command_line_parser.h
@@ -9,5 +9,7 @@ class CommandLineParser {
   bool SetupCommand(int argc, char** argv);
 
  private:
+  void EngineInstall(CLI::App* parent, const std::string& engine_name);
+
   CLI::App app_;
 };
\ No newline at end of file
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
new file mode 100644
index 000000000..12bea809d
--- /dev/null
+++ b/engine/controllers/engines.cc
@@ -0,0 +1,111 @@
+#include "engines.h"
+#include "utils/archive_utils.h"
+#include "utils/file_manager_utils.h"
+#include "utils/system_info_utils.h"
+
+void Engines::InitEngine(const HttpRequestPtr& req,
+                         std::function<void(const HttpResponsePtr&)>&& callback,
+                         const std::string& engine) const {
+  LOG_DEBUG << "InitEngine, Engine: " << engine;
+  if (engine.empty()) {
+    Json::Value res;
+    res["message"] = "Engine name is required";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k409Conflict);
+    callback(resp);
+    LOG_WARN << "No engine field in path param";
+    return;
+  }
+
+  auto system_info = system_info_utils::GetSystemInfo();
+  if (system_info.arch == system_info_utils::kUnsupported ||
+      system_info.os == system_info_utils::kUnsupported) {
+    Json::Value res;
+    res["message"] = "Unsupported OS or architecture";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k409Conflict);
+    callback(resp);
+    LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", "
+              << system_info.arch;
+    return;
+  }
+
+  auto version{"latest"};
+  constexpr auto gitHubHost = "https://api.github.com";
+
+  std::ostringstream engineReleasePath;
+  engineReleasePath << "/repos/janhq/" << engine << "/releases/" << version;
+
+  httplib::Client cli(gitHubHost);
+  using namespace nlohmann;
+  if (auto res = cli.Get(engineReleasePath.str())) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      try {
+        auto jsonResponse = json::parse(res->body);
+        auto assets = jsonResponse["assets"];
+
+        auto os_arch{system_info.os + "-" + system_info.arch};
+        for (auto& asset : assets) {
+          auto assetName = asset["name"].get<std::string>();
+          if (assetName.find(os_arch) != std::string::npos) {
+            std::string host{"https://github.com"};
+
+            auto full_url = asset["browser_download_url"].get<std::string>();
+            std::string path = full_url.substr(host.length());
+
+            auto fileName = asset["name"].get<std::string>();
+            LOG_INFO << "URL: " << full_url;
+
+            auto downloadTask = DownloadTask{.id = engine,
+                                             .type = DownloadType::Engine,
+                                             .error = std::nullopt,
+                                             .items = {DownloadItem{
+                                                 .id = engine,
+                                                 .host = host,
+                                                 .fileName = fileName,
+                                                 .type = DownloadType::Engine,
+                                                 .path = path,
+                                             }}};
+
+            DownloadService().AddAsyncDownloadTask(
+                downloadTask, [](const std::string& absolute_path) {
+                  // try to unzip the downloaded file
+                  std::filesystem::path downloadedEnginePath{absolute_path};
+                  LOG_INFO << "Downloaded engine path: "
+                           << downloadedEnginePath.string();
+
+                  archive_utils::ExtractArchive(
+                      downloadedEnginePath.string(),
+                      downloadedEnginePath.parent_path()
+                          .parent_path()
+                          .string());
+
+                  // remove the downloaded file
+                  std::filesystem::remove(absolute_path);
+                  LOG_INFO << "Finished!";
+                });
+
+            Json::Value res;
+            res["message"] = "Engine download started";
+            res["result"] = "OK";
+            auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+            resp->setStatusCode(k200OK);
+            callback(resp);
+            return;
+          }
+        }
+        Json::Value res;
+        res["message"] = "Engine not found";
+        res["result"] = "Error";
+        auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+        resp->setStatusCode(k404NotFound);
+        callback(resp);
+      } catch (const json::parse_error& e) {
+        std::cerr << "JSON parse error: " << e.what() << std::endl;
+      }
+    }
+  } else {
+    auto err = res.error();
+    LOG_ERROR << "HTTP error: " << httplib::to_string(err);
+  }
+}
\ No newline at end of file
diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h
new file mode 100644
index 000000000..282e79402
--- /dev/null
+++ b/engine/controllers/engines.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <drogon/HttpController.h>
+#include <trantor/utils/Logger.h>
+#include "services/download_service.h"
+#include "utils/cortex_utils.h"
+#include "utils/cortexso_parser.h"
+#include "utils/http_util.h"
+
+using namespace drogon;
+
+class Engines : public drogon::HttpController<Engines> {
+ public:
+  METHOD_LIST_BEGIN
+  METHOD_ADD(Engines::InitEngine, "/{1}/init", Post);
+  METHOD_LIST_END
+
+  void InitEngine(const HttpRequestPtr& req,
+                  std::function<void(const HttpResponsePtr&)>&& callback,
+                  const std::string& engine) const;
+};
diff --git a/engine/main.cc b/engine/main.cc
index 27591d48a..e8701dd7b 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -1,12 +1,11 @@
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
 #include <climits>  // for PATH_MAX
-#include <iostream>
 #include "controllers/command_line_parser.h"
 #include "cortex-common/cortexpythoni.h"
+#include "utils/archive_utils.h"
 #include "utils/cortex_utils.h"
 #include "utils/dylib.h"
-#include "utils/archive_utils.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -98,4 +97,4 @@ int main(int argc, char* argv[]) {
   drogon::app().run();
 
   return 0;
-}
\ No newline at end of file
+}
diff --git a/engine/utils/command_executor.h b/engine/utils/command_executor.h
new file mode 100644
index 000000000..9ba13025a
--- /dev/null
+++ b/engine/utils/command_executor.h
@@ -0,0 +1,49 @@
+#include <array>
+#include <cstdio>
+#include <iostream>
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+#ifdef _WIN32
+#define POPEN _popen
+#define PCLOSE _pclose
+#else
+#define POPEN popen
+#define PCLOSE pclose
+#endif
+
+class CommandExecutor {
+ public:
+  CommandExecutor(const std::string& command) {
+    FILE* pipe = POPEN(command.c_str(), "r");
+    if (!pipe) {
+      throw std::runtime_error("popen() failed!");
+    }
+    m_pipe = std::unique_ptr<FILE, decltype(&PCLOSE)>(pipe, PCLOSE);
+  }
+
+  CommandExecutor(const CommandExecutor&) = delete;
+  CommandExecutor& operator=(const CommandExecutor&) = delete;
+  CommandExecutor(CommandExecutor&&) = default;
+  CommandExecutor& operator=(CommandExecutor&&) = default;
+  ~CommandExecutor() = default;
+
+  std::string execute() {
+    if (!m_pipe) {
+      throw std::runtime_error("Command not initialized!");
+    }
+
+    std::array<char, 128> buffer;
+    std::string result;
+
+    while (fgets(buffer.data(), buffer.size(), m_pipe.get()) != nullptr) {
+      result += buffer.data();
+    }
+
+    return result;
+  }
+
+ private:
+  std::unique_ptr<FILE, decltype(&PCLOSE)> m_pipe{nullptr, PCLOSE};
+};
\ No newline at end of file
diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h
new file mode 100644
index 000000000..23c93c1a6
--- /dev/null
+++ b/engine/utils/engine_matcher_utils.h
@@ -0,0 +1,180 @@
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <regex>
+#include <string>
+#include <vector>
+#include "utils/cpuid/cpu_info.h"
+
+namespace engine_matcher_utils {
+// for testing purpose
+const std::vector<std::string> cortex_llamacpp_variants{
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz",
+    "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz",
+};
+const std::vector<std::string> cortex_onnx_variants{
+    "cortex.onnx-0.1.7-windows-amd64.tar.gz"};
+
+const std::vector<std::string> cortex_tensorrt_variants{
+    "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz",
+    "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"};
+
+inline std::string GetSuitableAvxVariant() {
+  cortex::cpuid::CpuInfo cpu_info;
+
+  LOG_INFO << "GetSuitableAvxVariant:" << "\n" << cpu_info.to_string();
+
+  if (cpu_info.has_avx512_f())
+    return "avx512";
+  if (cpu_info.has_avx2())
+    return "avx2";
+  if (cpu_info.has_avx())
+    return "avx";
+  return "noavx";
+}
+
+inline std::string GetSuitableCudaVariant(
+    const std::vector<std::string>& variants, const std::string& cuda_version) {
+  std::regex cuda_reg("cuda-(\\d+)-(\\d+)");
+  std::smatch match;
+
+  int requestedMajor = 0;
+  int requestedMinor = 0;
+
+  if (!cuda_version.empty()) {
+    // Split the provided CUDA version into major and minor parts
+    sscanf(cuda_version.c_str(), "%d.%d", &requestedMajor, &requestedMinor);
+  }
+
+  std::string selectedVariant;
+  int bestMatchMajor = -1;
+  int bestMatchMinor = -1;
+
+  for (const auto& variant : variants) {
+    if (std::regex_search(variant, match, cuda_reg)) {
+      // Found a CUDA version in the variant
+      int variantMajor = std::stoi(match[1]);
+      int variantMinor = std::stoi(match[2]);
+
+      if (requestedMajor == variantMajor) {
+        // If the major versions match, prefer the closest minor version
+        if (requestedMinor >= variantMinor &&
+            (variantMajor > bestMatchMajor ||
+             (variantMajor == bestMatchMajor &&
+              variantMinor > bestMatchMinor))) {
+          selectedVariant = variant;
+          bestMatchMajor = variantMajor;
+          bestMatchMinor = variantMinor;
+        }
+      }
+    } else if (cuda_version.empty() && selectedVariant.empty()) {
+      // If no CUDA version is provided, select the variant without any CUDA in the name
+      selectedVariant = variant;
+    }
+  }
+
+  return selectedVariant;
+}
+
+inline std::string ValidateTensorrtLlm(const std::vector<std::string>& variants,
+                                       const std::string& os,
+                                       const std::string& cuda_version) {
+  std::vector<std::string> os_compatible_list;
+  std::copy_if(variants.begin(), variants.end(),
+               std::back_inserter(os_compatible_list),
+               [&os](const std::string& variant) {
+                 auto os_match = "-" + os;
+                 return variant.find(os_match) != std::string::npos;
+               });
+  auto cuda_compatible =
+      GetSuitableCudaVariant(os_compatible_list, cuda_version);
+  return cuda_compatible;
+}
+
+inline std::string ValidateOnnx(const std::vector<std::string>& variants,
+                                const std::string& os,
+                                const std::string& cpu_arch) {
+
+  std::vector<std::string> os_and_arch_compatible_list;
+  std::copy_if(variants.begin(), variants.end(),
+               std::back_inserter(os_and_arch_compatible_list),
+               [&os, &cpu_arch](const std::string& variant) {
+                 auto os_match = "-" + os;
+                 auto cpu_arch_match = "-" + cpu_arch;
+
+                 return variant.find(os_match) != std::string::npos &&
+                        variant.find(cpu_arch_match) != std::string::npos;
+               });
+  if (!os_and_arch_compatible_list.empty())
+    return os_and_arch_compatible_list[0];
+  return "";
+}
+
+inline std::string Validate(const std::vector<std::string>& variants,
+                            const std::string& os, const std::string& cpu_arch,
+                            const std::string& suitable_avx,
+                            const std::string& cuda_version) {
+
+  // Early return if the OS is unsupported
+  if (os != "mac" && os != "windows" && os != "linux") {
+    // TODO: throw is better
+    return "";
+  }
+
+  std::vector<std::string> os_and_arch_compatible_list;
+  std::copy_if(variants.begin(), variants.end(),
+               std::back_inserter(os_and_arch_compatible_list),
+               [&os, &cpu_arch](const std::string& variant) {
+                 auto os_match = "-" + os;
+                 auto cpu_arch_match = "-" + cpu_arch;
+
+                 return variant.find(os_match) != std::string::npos &&
+                        variant.find(cpu_arch_match) != std::string::npos;
+               });
+
+  if (os == "mac" && !os_and_arch_compatible_list.empty())
+    return os_and_arch_compatible_list[0];
+
+  std::vector<std::string> avx_compatible_list;
+
+  std::copy_if(os_and_arch_compatible_list.begin(),
+               os_and_arch_compatible_list.end(),
+               std::back_inserter(avx_compatible_list),
+               [&suitable_avx](const std::string& variant) {
+                 auto suitable_avx_match = "-" + suitable_avx;
+
+                 return variant.find(suitable_avx_match) != std::string::npos;
+               });
+
+  auto cuda_compatible =
+      GetSuitableCudaVariant(avx_compatible_list, cuda_version);
+
+  return cuda_compatible;
+}
+}  // namespace engine_matcher_utils
\ No newline at end of file
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 184428751..d13935295 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -1,22 +1,64 @@
 #pragma once
 
-#include <ostream>
+#include <trantor/utils/Logger.h>
+#include <regex>
+#include <vector>
+#include "utils/command_executor.h"
+#ifdef _WIN32
+#include <windows.h>
+#endif
 
 namespace system_info_utils {
+
+constexpr static auto kUnsupported{"Unsupported"};
+constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"};
+constexpr static auto kGpuQueryCommand{
+    "nvidia-smi --query-gpu=index,memory.total,name,compute_cap "
+    "--format=csv,noheader,nounits"};
+constexpr static auto kGpuInfoRegex{
+    R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"};
+
 struct SystemInfo {
   std::string os;
   std::string arch;
 };
 
-constexpr static auto kUnsupported{"Unsupported"};
+/**
+ * @brief Get the Gpu Arch. Currently we only support Ampere and Ada.
+ * Might need to come up with better way to detect the GPU architecture.
+ * 
+ * @param gpuName E.g. NVIDIA GeForce RTX 4090
+ * @return corresponding GPU arch. E.g. ampere, ada.
+ */
+inline std::string GetGpuArch(const std::string& gpuName) {
+  std::string lowerGpuName = gpuName;
+  std::transform(lowerGpuName.begin(), lowerGpuName.end(), lowerGpuName.begin(),
+                 ::tolower);
+
+  if (lowerGpuName.find("nvidia") == std::string::npos) {
+    return "unknown";
+  }
+
+  if (gpuName.find("30") != std::string::npos) {
+    return "ampere";
+  } else if (gpuName.find("40") != std::string::npos) {
+    return "ada";
+  } else {
+    return "unknown";
+  }
+}
 
 inline SystemInfo GetSystemInfo() {
   std::ostringstream arch;
   std::ostringstream os;
 
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \
+    defined(__amd64) || defined(__x86_64) || defined(_M_AMD64)
   arch << "amd64";
-#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
+#elif defined(__arm__) || defined(__arm) || defined(__arm64__) || \
+    defined(__aarch64__) || defined(__thumb__) ||                 \
+    defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || \
+    defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT)
   arch << "arm64";
 #else
   arch << kUnsupported;
@@ -33,4 +75,225 @@ inline SystemInfo GetSystemInfo() {
 #endif
   return SystemInfo{os.str(), arch.str()};
 }
-}  // namespace system_info_utils
\ No newline at end of file
+
+constexpr auto vulkan_sample_output = R"(
+==========
+VULKANINFO
+==========
+
+Vulkan Instance Version: 1.3.280
+
+
+Instance Extensions: count = 19
+-------------------------------
+VK_EXT_debug_report                    : extension revision 10
+VK_EXT_debug_utils                     : extension revision 2
+VK_EXT_direct_mode_display             : extension revision 1
+VK_EXT_surface_maintenance1            : extension revision 1
+VK_EXT_swapchain_colorspace            : extension revision 4
+VK_KHR_device_group_creation           : extension revision 1
+VK_KHR_display                         : extension revision 23
+VK_KHR_external_fence_capabilities     : extension revision 1
+VK_KHR_external_memory_capabilities    : extension revision 1
+VK_KHR_external_semaphore_capabilities : extension revision 1
+VK_KHR_get_display_properties2         : extension revision 1
+VK_KHR_get_physical_device_properties2 : extension revision 2
+VK_KHR_get_surface_capabilities2       : extension revision 1
+VK_KHR_portability_enumeration         : extension revision 1
+VK_KHR_surface                         : extension revision 25
+VK_KHR_surface_protected_capabilities  : extension revision 1
+VK_KHR_win32_surface                   : extension revision 6
+VK_LUNARG_direct_driver_loading        : extension revision 1
+VK_NV_external_memory_capabilities     : extension revision 1
+
+Instance Layers: count = 1
+--------------------------
+VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280  version 1
+
+Devices:
+========
+GPU0:
+        apiVersion         = 1.3.280
+        driverVersion      = 560.70.0.0
+        vendorID           = 0x10de
+        deviceID           = 0x2684
+        deviceType         = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
+        deviceName         = NVIDIA GeForce RTX 4090
+        driverID           = DRIVER_ID_NVIDIA_PROPRIETARY
+        driverName         = NVIDIA
+        driverInfo         = 560.70
+        conformanceVersion = 1.3.8.2
+        deviceUUID         = 11deafdf-9f15-e857-2a87-8acc153fc9f7
+        driverUUID         = 10f251d9-d3c0-5001-bf67-24bb06423040
+)";
+
+constexpr auto gpu_query_list_sample_output = R"(
+0, 46068, NVIDIA RTX A6000, 8.6
+1, 46068, NVIDIA RTX A6000, 8.6
+)";
+
+constexpr auto nvidia_smi_sample_output = R"(
+Sun Aug 25 22:29:25 2024
++-----------------------------------------------------------------------------------------+
+| NVIDIA-SMI 560.70                 Driver Version: 560.70         CUDA Version: 12.6     |
+|-----------------------------------------+------------------------+----------------------+
+| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
+|                                         |                        |               MIG M. |
+|=========================================+========================+======================|
+|   0  NVIDIA GeForce RTX 4090      WDDM  |   00000000:01:00.0 Off |                  Off |
+|  0%   24C    P8             10W /  500W |     395MiB /  24564MiB |     19%      Default |
+|                                         |                        |                  N/A |
++-----------------------------------------+------------------------+----------------------+
+
++-----------------------------------------------------------------------------------------+
+| Processes:                                                                              |
+|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
+|        ID   ID                                                               Usage      |
+|=========================================================================================|
+|    0   N/A  N/A      3984    C+G   ...5n1h2txyewy\ShellExperienceHost.exe      N/A      |
+|    0   N/A  N/A      7904    C+G   ...ekyb3d8bbwe\PhoneExperienceHost.exe      N/A      |
+|    0   N/A  N/A      8240    C+G   ...__8wekyb3d8bbwe\WindowsTerminal.exe      N/A      |
+|    0   N/A  N/A      8904    C+G   C:\Windows\explorer.exe                     N/A      |
+|    0   N/A  N/A      9304    C+G   ...siveControlPanel\SystemSettings.exe      N/A      |
+|    0   N/A  N/A      9944    C+G   ...nt.CBS_cw5n1h2txyewy\SearchHost.exe      N/A      |
+|    0   N/A  N/A     11140    C+G   ...2txyewy\StartMenuExperienceHost.exe      N/A      |
++-----------------------------------------------------------------------------------------+
+)";
+
+inline bool IsNvidiaSmiAvailable() {
+#ifdef _WIN32
+  // Check if nvidia-smi.exe exists in the PATH on Windows
+  char buffer[MAX_PATH];
+  if (SearchPath(NULL, "nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) {
+    return true;
+  } else {
+    return false;
+  }
+#else
+  // Check if nvidia-smi is available on Unix-like systems
+  int result = std::system("which nvidia-smi > /dev/null 2>&1");
+  return result == 0;
+#endif
+}
+
+inline std::string GetCudaVersion() {
+  if (!IsNvidiaSmiAvailable()) {
+    LOG_INFO << "nvidia-smi is not available!";
+    return "";
+  }
+  try {
+    CommandExecutor cmd("nvidia-smi");
+    auto output = cmd.execute();
+
+    const std::regex cuda_version_reg(kCudaVersionRegex);
+    std::smatch match;
+
+    if (std::regex_search(output, match, cuda_version_reg)) {
+      LOG_INFO << "CUDA Version: " << match[1].str();
+      return match[1].str();
+    } else {
+      LOG_ERROR << "CUDA Version not found!";
+      return "";
+    }
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Error: " << e.what();
+    return "";
+  }
+}
+
+struct GpuInfo {
+  std::string id;
+  std::string vram;
+  std::string name;
+  std::string arch;
+  std::optional<std::string> compute_cap;
+};
+
+inline std::vector<GpuInfo> GetGpuInfoListVulkan() {
+  std::vector<GpuInfo> gpuInfoList;
+
+  try {
+    // NOTE: current ly we don't have logic to download vulkaninfoSDK
+#ifdef _WIN32
+    CommandExecutor cmd("vulkaninfoSDK.exe --summary");
+#else
+    CommandExecutor cmd("vulkaninfoSDK --summary");
+#endif
+    auto output = cmd.execute();
+
+    // Regular expression patterns to match each field
+    std::regex gpu_block_reg(R"(GPU(\d+):)");
+    std::regex field_pattern(R"(\s*(\w+)\s*=\s*(.*))");
+
+    std::sregex_iterator iter(output.begin(), output.end(), gpu_block_reg);
+    std::sregex_iterator end;
+
+    while (iter != end) {
+      GpuInfo gpuInfo;
+
+      // Extract GPU ID from the GPU block pattern (e.g., GPU0 -> id = "0")
+      gpuInfo.id = (*iter)[1].str();
+
+      auto gpu_start_pos = iter->position(0) + iter->length(0);
+      auto gpu_end_pos = std::next(iter) != end ? std::next(iter)->position(0)
+                                                : std::string::npos;
+      std::string gpu_block =
+          output.substr(gpu_start_pos, gpu_end_pos - gpu_start_pos);
+
+      std::sregex_iterator field_iter(gpu_block.begin(), gpu_block.end(),
+                                      field_pattern);
+
+      while (field_iter != end) {
+        std::string key = (*field_iter)[1].str();
+        std::string value = (*field_iter)[2].str();
+
+        if (key == "deviceName")
+          gpuInfo.name = value;
+        else if (key == "apiVersion")
+          gpuInfo.compute_cap = value;
+
+        gpuInfo.vram = "";  // not available
+        gpuInfo.arch = GetGpuArch(gpuInfo.name);
+
+        ++field_iter;
+      }
+
+      gpuInfoList.push_back(gpuInfo);
+      ++iter;
+    }
+  } catch (const std::exception& e) {}
+
+  return gpuInfoList;
+}
+
+inline std::vector<GpuInfo> GetGpuInfoList() {
+  std::vector<GpuInfo> gpuInfoList;
+
+  try {
+    CommandExecutor cmd(kGpuQueryCommand);
+    auto output = cmd.execute();
+
+    const std::regex gpu_info_reg(kGpuInfoRegex);
+    std::smatch match;
+    std::string::const_iterator search_start(output.cbegin());
+
+    while (
+        std::regex_search(search_start, output.cend(), match, gpu_info_reg)) {
+      GpuInfo gpuInfo = {
+          match[1].str(),              // id
+          match[2].str(),              // vram
+          match[3].str(),              // name
+          GetGpuArch(match[3].str()),  // arch
+          match[4].str()               // compute_cap
+      };
+      gpuInfoList.push_back(gpuInfo);
+      search_start = match.suffix().first;
+    }
+  } catch (const std::exception& e) {
+    std::cerr << "Error: " << e.what() << std::endl;
+  }
+
+  return gpuInfoList;
+}
+}  // namespace system_info_utils

From bbc3e3192f5f426cfa511bf18430964757df4f03 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Wed, 28 Aug 2024 19:39:41 +0700
Subject: [PATCH 11/16] fix: guarantee lifetime for captured variables (#1042)

---
 .github/workflows/cortex-cpp-quality-gate.yml |  2 +-
 engine/CMakeLists.txt                         |  2 +-
 engine/controllers/command_line_parser.cc     | 21 ++++++++++---------
 engine/controllers/command_line_parser.h      |  3 ++-
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 092423821..39526d9a4 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -16,7 +16,7 @@ env:
 jobs:
   build-and-test:
     runs-on: ${{ matrix.runs-on }}
-    timeout-minutes: 40
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index d18d28f2d..7ba095d6b 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -43,7 +43,7 @@ if(MSVC)
       $<$<CONFIG:Release>:/MT> #--|
   )
 endif()
-
+ 
 if(LLAMA_CUDA)
   cmake_minimum_required(VERSION 3.17)
 
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index b4f460261..2c5f79c84 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -12,13 +12,14 @@
 CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {}
 
 bool CommandLineParser::SetupCommand(int argc, char** argv) {
+  std::string model_id;
+  
   // Models group commands
   {
     auto models_cmd =
         app_.add_subcommand("models", "Subcommands for managing models");
 
     auto start_cmd = models_cmd->add_subcommand("start", "Start a model by ID");
-    std::string model_id;
     start_cmd->add_option("model_id", model_id, "");
     start_cmd->callback([&model_id]() {
       // TODO(sang) switch to <model_id>.yaml when implement model manager
@@ -67,12 +68,12 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
         models_cmd->add_subcommand("update", "Update configuration of a model");
   }
 
+  std::string msg;
   {
     auto chat_cmd =
         app_.add_subcommand("chat", "Send a chat request to a model");
-    std::string model_id;
+    
     chat_cmd->add_option("model_id", model_id, "");
-    std::string msg;
     chat_cmd->add_option("-m,--message", msg,
                            "Message to chat with model");
 
@@ -92,15 +93,17 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   auto embeddings_cmd = app_.add_subcommand(
       "embeddings", "Creates an embedding vector representing the input text");
 
+  // Default version is latest
+  std::string version{"latest"};
   {  // engines group commands
     auto engines_cmd = app_.add_subcommand("engines", "Get cortex engines");
     auto list_engines_cmd =
         engines_cmd->add_subcommand("list", "List all cortex engines");
     auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
 
-    EngineInstall(engines_cmd, "cortex.llamacpp");
-    EngineInstall(engines_cmd, "cortex.onnx");
-    EngineInstall(engines_cmd, "cortex.tensorrt-llm");
+    EngineInstall(engines_cmd, "cortex.llamacpp", version);
+    EngineInstall(engines_cmd, "cortex.onnx", version);
+    EngineInstall(engines_cmd, "cortex.tensorrt-llm", version);
   }
 
   auto run_cmd =
@@ -119,17 +122,15 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
 }
 
 void CommandLineParser::EngineInstall(CLI::App* parent,
-                                      const std::string& engine_name) {
+                                      const std::string& engine_name, std::string& version) {
   auto engine_cmd =
       parent->add_subcommand(engine_name, "Manage " + engine_name + " engine");
 
-  // Default version is latest
-  std::string version{"latest"};
   auto install_cmd = engine_cmd->add_subcommand(
       "install", "Install " + engine_name + " engine");
   install_cmd->add_option("-v, --version", version,
                           "Engine version. Default will be latest");
-  install_cmd->callback([&engine_name, &version] {
+  install_cmd->callback([engine_name, &version] {
     commands::EngineInitCmd eic(engine_name, version);
     eic.Exec();
   });
diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h
index e48ed31b0..b6695346e 100644
--- a/engine/controllers/command_line_parser.h
+++ b/engine/controllers/command_line_parser.h
@@ -9,7 +9,8 @@ class CommandLineParser {
   bool SetupCommand(int argc, char** argv);
 
  private:
-  void EngineInstall(CLI::App* parent, const std::string& engine_name);
+  void EngineInstall(CLI::App* parent, const std::string& engine_name,
+                     std::string& version);
 
   CLI::App app_;
 };
\ No newline at end of file

From ba6816f4188f43f7090e25041aa0993c88d70bab Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99
 <35255081+nguyenhoangthuan99@users.noreply.github.com>
Date: Thu, 29 Aug 2024 07:44:34 +0700
Subject: [PATCH 12/16] feat: models get command (#1035)

---
 engine/.gitignore                         |   3 +-
 engine/commands/model_get_cmd.cc          | 135 ++++++++++++++++++++++
 engine/commands/model_get_cmd.h           |  15 +++
 engine/controllers/command_line_parser.cc |  10 ++
 engine/controllers/models.cc              |  66 +++++++++++
 engine/controllers/models.h               |   3 +
 6 files changed, 231 insertions(+), 1 deletion(-)
 create mode 100644 engine/commands/model_get_cmd.cc
 create mode 100644 engine/commands/model_get_cmd.h

diff --git a/engine/.gitignore b/engine/.gitignore
index 10d117410..93c893e48 100644
--- a/engine/.gitignore
+++ b/engine/.gitignore
@@ -563,4 +563,5 @@ build
 build-deps
 .DS_Store
 
-uploads/**
\ No newline at end of file
+uploads/**
+CMakePresets.json
\ No newline at end of file
diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc
new file mode 100644
index 000000000..b9f1c5d52
--- /dev/null
+++ b/engine/commands/model_get_cmd.cc
@@ -0,0 +1,135 @@
+#include "model_get_cmd.h"
+#include <filesystem>
+#include <iostream>
+#include <vector>
+#include "config/yaml_config.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
+
+namespace commands {
+ModelGetCmd::ModelGetCmd(std::string model_handle)
+    : model_handle_(std::move(model_handle)) {}
+
+void ModelGetCmd::Exec() {
+  if (std::filesystem::exists(cortex_utils::models_folder) &&
+      std::filesystem::is_directory(cortex_utils::models_folder)) {
+    bool found_model = false;
+    // Iterate through directory
+    for (const auto& entry :
+         std::filesystem::directory_iterator(cortex_utils::models_folder)) {
+      if (entry.is_regular_file() && entry.path().stem() == model_handle_ &&
+          entry.path().extension() == ".yaml") {
+        try {
+          config::YamlHandler handler;
+          handler.ModelConfigFromFile(entry.path().string());
+          const auto& model_config = handler.GetModelConfig();
+          std::cout << "ModelConfig Details:\n";
+          std::cout << "-------------------\n";
+
+          // Print non-null strings
+          if (!model_config.id.empty())
+            std::cout << "id: " << model_config.id << "\n";
+          if (!model_config.name.empty())
+            std::cout << "name: " << model_config.name << "\n";
+          if (!model_config.model.empty())
+            std::cout << "model: " << model_config.model << "\n";
+          if (!model_config.version.empty())
+            std::cout << "version: " << model_config.version << "\n";
+
+          // Print non-empty vectors
+          if (!model_config.stop.empty()) {
+            std::cout << "stop: [";
+            for (size_t i = 0; i < model_config.stop.size(); ++i) {
+              std::cout << model_config.stop[i];
+              if (i < model_config.stop.size() - 1)
+                std::cout << ", ";
+            }
+            std::cout << "]\n";
+          }
+          // Print valid numbers
+          if (!std::isnan(static_cast<double>(model_config.top_p)))
+            std::cout << "top_p: " << model_config.top_p << "\n";
+          if (!std::isnan(static_cast<double>(model_config.temperature)))
+            std::cout << "temperature: " << model_config.temperature << "\n";
+          if (!std::isnan(static_cast<double>(model_config.frequency_penalty)))
+            std::cout << "frequency_penalty: " << model_config.frequency_penalty
+                      << "\n";
+          if (!std::isnan(static_cast<double>(model_config.presence_penalty)))
+            std::cout << "presence_penalty: " << model_config.presence_penalty
+                      << "\n";
+          if (!std::isnan(static_cast<double>(model_config.max_tokens)))
+            std::cout << "max_tokens: " << model_config.max_tokens << "\n";
+          if (!std::isnan(static_cast<double>(model_config.stream)))
+            std::cout << "stream: " << std::boolalpha << model_config.stream
+                      << "\n";
+          if (!std::isnan(static_cast<double>(model_config.ngl)))
+            std::cout << "ngl: " << model_config.ngl << "\n";
+          if (!std::isnan(static_cast<double>(model_config.ctx_len)))
+            std::cout << "ctx_len: " << model_config.ctx_len << "\n";
+
+          // Print non-null strings
+          if (!model_config.engine.empty())
+            std::cout << "engine: " << model_config.engine << "\n";
+          if (!model_config.prompt_template.empty())
+            std::cout << "prompt_template: " << model_config.prompt_template
+                      << "\n";
+          if (!model_config.system_template.empty())
+            std::cout << "system_template: " << model_config.system_template
+                      << "\n";
+          if (!model_config.user_template.empty())
+            std::cout << "user_template: " << model_config.user_template
+                      << "\n";
+          if (!model_config.ai_template.empty())
+            std::cout << "ai_template: " << model_config.ai_template << "\n";
+          if (!model_config.os.empty())
+            std::cout << "os: " << model_config.os << "\n";
+          if (!model_config.gpu_arch.empty())
+            std::cout << "gpu_arch: " << model_config.gpu_arch << "\n";
+          if (!model_config.quantization_method.empty())
+            std::cout << "quantization_method: "
+                      << model_config.quantization_method << "\n";
+          if (!model_config.precision.empty())
+            std::cout << "precision: " << model_config.precision << "\n";
+
+          if (!std::isnan(static_cast<double>(model_config.tp)))
+            std::cout << "tp: " << model_config.tp << "\n";
+
+          // Print non-null strings
+          if (!model_config.trtllm_version.empty())
+            std::cout << "trtllm_version: " << model_config.trtllm_version
+                      << "\n";
+          if (!std::isnan(static_cast<double>(model_config.text_model)))
+            std::cout << "text_model: " << std::boolalpha
+                      << model_config.text_model << "\n";
+
+          // Print non-empty vectors
+          if (!model_config.files.empty()) {
+            std::cout << "files: [";
+            for (size_t i = 0; i < model_config.files.size(); ++i) {
+              std::cout << model_config.files[i];
+              if (i < model_config.files.size() - 1)
+                std::cout << ", ";
+            }
+            std::cout << "]\n";
+          }
+
+          // Print valid size_t number
+          if (model_config.created != 0)
+            std::cout << "created: " << model_config.created << "\n";
+
+          if (!model_config.object.empty())
+            std::cout << "object: " << model_config.object << "\n";
+          if (!model_config.owned_by.empty())
+            std::cout << "owned_by: " << model_config.owned_by << "\n";
+
+          found_model = true;
+          break;
+        } catch (const std::exception& e) {
+          LOG_ERROR << "Error reading yaml file '" << entry.path().string()
+                    << "': " << e.what();
+        }
+      }
+    }
+  }
+}
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_get_cmd.h b/engine/commands/model_get_cmd.h
new file mode 100644
index 000000000..8de48e23d
--- /dev/null
+++ b/engine/commands/model_get_cmd.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <cmath>  // For std::isnan
+#include <string>
+namespace commands {
+
+class ModelGetCmd {
+ public:
+  ModelGetCmd(std::string model_handle);
+  void Exec();
+
+ private:
+  std::string model_handle_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index 2c5f79c84..42a5f8731 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -1,6 +1,8 @@
 #include "command_line_parser.h"
 #include "commands/engine_init_cmd.h"
 #include "commands/model_list_cmd.h"
+#include "commands/model_get_cmd.h"
+
 #include "commands/model_pull_cmd.h"
 #include "commands/start_model_cmd.h"
 #include "commands/stop_model_cmd.h"
@@ -51,6 +53,14 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
       command.Exec();
     });
 
+    auto get_models_cmd =
+        models_cmd->add_subcommand("get", "Get info of {model_id} locally");
+    get_models_cmd->add_option("model_id", model_id, "");
+    get_models_cmd->callback([&model_id](){
+      commands::ModelGetCmd command(model_id);
+      command.Exec();
+    });
+
     auto model_pull_cmd =
         app_.add_subcommand("pull",
                             "Download a model from a registry. Working with "
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index e445ff90a..52a8bff28 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -101,4 +101,70 @@ void Models::ListModel(
   auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
   resp->setStatusCode(k200OK);
   callback(resp);
+}
+
+void Models::GetModel(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) const {
+  if (!http_util::HasFieldInReq(req, callback, "modelId")) {
+    return;
+  }
+  auto model_handle = (*(req->getJsonObject())).get("modelId", "").asString();
+  LOG_DEBUG << "GetModel, Model handle: " << model_handle;
+  Json::Value ret;
+  ret["object"] = "list";
+  Json::Value data(Json::arrayValue);
+  if (std::filesystem::exists(cortex_utils::models_folder) &&
+      std::filesystem::is_directory(cortex_utils::models_folder)) {
+    // Iterate through directory
+    for (const auto& entry :
+         std::filesystem::directory_iterator(cortex_utils::models_folder)) {
+      if (entry.is_regular_file() && entry.path().extension() == ".yaml" &&
+          entry.path().stem() == model_handle) {
+        try {
+          config::YamlHandler handler;
+          handler.ModelConfigFromFile(entry.path().string());
+          auto const& model_config = handler.GetModelConfig();
+          Json::Value obj;
+          obj["name"] = model_config.name;
+          obj["model"] = model_config.model;
+          obj["version"] = model_config.version;
+          Json::Value stop_array(Json::arrayValue);
+          for (const std::string& stop : model_config.stop)
+            stop_array.append(stop);
+          obj["stop"] = stop_array;
+          obj["top_p"] = model_config.top_p;
+          obj["temperature"] = model_config.temperature;
+          obj["presence_penalty"] = model_config.presence_penalty;
+          obj["max_tokens"] = model_config.max_tokens;
+          obj["stream"] = model_config.stream;
+          obj["ngl"] = model_config.ngl;
+          obj["ctx_len"] = model_config.ctx_len;
+          obj["engine"] = model_config.engine;
+          obj["prompt_template"] = model_config.prompt_template;
+
+          Json::Value files_array(Json::arrayValue);
+          for (const std::string& file : model_config.files)
+            files_array.append(file);
+          obj["files"] = files_array;
+          obj["id"] = model_config.id;
+          obj["created"] = static_cast<uint32_t>(model_config.created);
+          obj["object"] = model_config.object;
+          obj["owned_by"] = model_config.owned_by;
+          if (model_config.engine == "cortex.tensorrt-llm") {
+            obj["trtllm_version"] = model_config.trtllm_version;
+          }
+          data.append(std::move(obj));
+        } catch (const std::exception& e) {
+          LOG_ERROR << "Error reading yaml file '" << entry.path().string()
+                    << "': " << e.what();
+        }
+      }
+    }
+  }
+  ret["data"] = data;
+  ret["result"] = "OK";
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k200OK);
+  callback(resp);
 }
\ No newline at end of file
diff --git a/engine/controllers/models.h b/engine/controllers/models.h
index d8f9415e4..789ce1398 100644
--- a/engine/controllers/models.h
+++ b/engine/controllers/models.h
@@ -14,10 +14,13 @@ class Models : public drogon::HttpController<Models> {
   METHOD_LIST_BEGIN
   METHOD_ADD(Models::PullModel, "/pull", Post);
   METHOD_ADD(Models::ListModel, "/list", Get);
+  METHOD_ADD(Models::GetModel, "/get", Post);
   METHOD_LIST_END
 
   void PullModel(const HttpRequestPtr& req,
                  std::function<void(const HttpResponsePtr&)>&& callback) const;
   void ListModel(const HttpRequestPtr& req,
                  std::function<void(const HttpResponsePtr&)>&& callback) const;
+  void GetModel(const HttpRequestPtr& req,
+                 std::function<void(const HttpResponsePtr&)>&& callback) const;
 };
\ No newline at end of file

From 05b4b2c5b35085425cae2d53f6e02724c716f3cd Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99
 <35255081+nguyenhoangthuan99@users.noreply.github.com>
Date: Thu, 29 Aug 2024 13:54:44 +0700
Subject: [PATCH 13/16] Feat background process server (#1043)

---
 engine/CMakeLists.txt                     |   5 +-
 engine/commands/model_get_cmd.cc          |   6 +
 engine/commands/model_get_cmd.h           |   2 +
 engine/controllers/command_line_parser.cc |   2 +-
 engine/main.cc                            | 160 +++++++++++++++-------
 engine/test/components/CMakeLists.txt     |   2 +-
 6 files changed, 122 insertions(+), 55 deletions(-)

diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 7ba095d6b..2ac1947d4 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -81,7 +81,10 @@ endif()
 
 add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")
 
-# add_subdirectory(test)
+option(CMAKE_BUILD_TEST "Enable testing" OFF)
+if(CMAKE_BUILD_TEST)
+  add_subdirectory(test)
+endif()
 
 find_package(jsoncpp CONFIG REQUIRED)
 find_package(Drogon CONFIG REQUIRED)
diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc
index b9f1c5d52..82691ea32 100644
--- a/engine/commands/model_get_cmd.cc
+++ b/engine/commands/model_get_cmd.cc
@@ -7,6 +7,7 @@
 #include "utils/cortex_utils.h"
 
 namespace commands {
+
 ModelGetCmd::ModelGetCmd(std::string model_handle)
     : model_handle_(std::move(model_handle)) {}
 
@@ -17,6 +18,7 @@ void ModelGetCmd::Exec() {
     // Iterate through directory
     for (const auto& entry :
          std::filesystem::directory_iterator(cortex_utils::models_folder)) {
+
       if (entry.is_regular_file() && entry.path().stem() == model_handle_ &&
           entry.path().extension() == ".yaml") {
         try {
@@ -60,6 +62,7 @@ void ModelGetCmd::Exec() {
           if (!std::isnan(static_cast<double>(model_config.max_tokens)))
             std::cout << "max_tokens: " << model_config.max_tokens << "\n";
           if (!std::isnan(static_cast<double>(model_config.stream)))
+
             std::cout << "stream: " << std::boolalpha << model_config.stream
                       << "\n";
           if (!std::isnan(static_cast<double>(model_config.ngl)))
@@ -71,6 +74,7 @@ void ModelGetCmd::Exec() {
           if (!model_config.engine.empty())
             std::cout << "engine: " << model_config.engine << "\n";
           if (!model_config.prompt_template.empty())
+
             std::cout << "prompt_template: " << model_config.prompt_template
                       << "\n";
           if (!model_config.system_template.empty())
@@ -86,6 +90,7 @@ void ModelGetCmd::Exec() {
           if (!model_config.gpu_arch.empty())
             std::cout << "gpu_arch: " << model_config.gpu_arch << "\n";
           if (!model_config.quantization_method.empty())
+
             std::cout << "quantization_method: "
                       << model_config.quantization_method << "\n";
           if (!model_config.precision.empty())
@@ -96,6 +101,7 @@ void ModelGetCmd::Exec() {
 
           // Print non-null strings
           if (!model_config.trtllm_version.empty())
+
             std::cout << "trtllm_version: " << model_config.trtllm_version
                       << "\n";
           if (!std::isnan(static_cast<double>(model_config.text_model)))
diff --git a/engine/commands/model_get_cmd.h b/engine/commands/model_get_cmd.h
index 8de48e23d..9bd9d2213 100644
--- a/engine/commands/model_get_cmd.h
+++ b/engine/commands/model_get_cmd.h
@@ -1,11 +1,13 @@
 #pragma once
 
+
 #include <cmath>  // For std::isnan
 #include <string>
 namespace commands {
 
 class ModelGetCmd {
  public:
+
   ModelGetCmd(std::string model_handle);
   void Exec();
 
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index 42a5f8731..d4068acb9 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -2,7 +2,6 @@
 #include "commands/engine_init_cmd.h"
 #include "commands/model_list_cmd.h"
 #include "commands/model_get_cmd.h"
-
 #include "commands/model_pull_cmd.h"
 #include "commands/start_model_cmd.h"
 #include "commands/stop_model_cmd.h"
@@ -140,6 +139,7 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
       "install", "Install " + engine_name + " engine");
   install_cmd->add_option("-v, --version", version,
                           "Engine version. Default will be latest");
+
   install_cmd->callback([engine_name, &version] {
     commands::EngineInitCmd eic(engine_name, version);
     eic.Exec();
diff --git a/engine/main.cc b/engine/main.cc
index 143cb94e6..75e0881f6 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -10,8 +10,10 @@
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
 #include <mach-o/dyld.h>
+#include <sys/types.h>
 #elif defined(__linux__)
 #include <libgen.h>  // for dirname()
+#include <sys/types.h>
 #include <unistd.h>  // for readlink()
 #elif defined(_WIN32)
 #include <windows.h>
@@ -20,8 +22,104 @@
 #error "Unsupported platform!"
 #endif
 
+
+void RunServer(){
+  // Create logs/ folder and setup log to file
+      std::filesystem::create_directory(cortex_utils::logs_folder);
+      trantor::AsyncFileLogger asyncFileLogger;
+      asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+      asyncFileLogger.startLogging();
+      trantor::Logger::setOutputFunction(
+          [&](const char* msg, const uint64_t len) {
+            asyncFileLogger.output(msg, len);
+          },
+          [&]() { asyncFileLogger.flush(); });
+      asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+      // Number of cortex.cpp threads
+      // if (argc > 1) {
+      //   thread_num = std::atoi(argv[1]);
+      // }
+
+      // // Check for host argument
+      // if (argc > 2) {
+      //   host = argv[2];
+      // }
+
+      // // Check for port argument
+      // if (argc > 3) {
+      //   port = std::atoi(argv[3]);  // Convert string argument to int
+      // }
+      int thread_num = 1;
+      std::string host = "127.0.0.1";
+      int port = 3928;
+
+      int logical_cores = std::thread::hardware_concurrency();
+      int drogon_thread_num = std::max(thread_num, logical_cores);
+      // cortex_utils::nitro_logo();
+#ifdef CORTEX_CPP_VERSION
+      LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
+#else
+      LOG_INFO << "cortex.cpp version: undefined";
+#endif
+
+      LOG_INFO << "Server started, listening at: " << host << ":" << port;
+      LOG_INFO << "Please load your model";
+      drogon::app().addListener(host, port);
+      drogon::app().setThreadNum(drogon_thread_num);
+      LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
+
+      drogon::app().run();
+      // return 0;
+}
+
+void ForkProcess() {
+#if defined(_WIN32) || defined(_WIN64)
+  // Windows-specific code to create a new process
+  STARTUPINFO si;
+  PROCESS_INFORMATION pi;
+
+  ZeroMemory(&si, sizeof(si));
+  si.cb = sizeof(si);
+  ZeroMemory(&pi, sizeof(pi));
+  std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
+  // Create child process
+  if (!CreateProcess(
+          NULL,  // No module name (use command line)
+          const_cast<char*>(cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,                   // Process handle not inheritable
+          NULL,                   // Thread handle not inheritable
+          FALSE,                  // Set handle inheritance to FALSE
+          0,                      // No creation flags
+          NULL,                   // Use parent's environment block
+          NULL,                   // Use parent's starting directory
+          &si,                    // Pointer to STARTUPINFO structure
+          &pi))                   // Pointer to PROCESS_INFORMATION structure
+  {
+    std::cout << "Could not start server: " << GetLastError() << std::endl;
+  } else {
+    std::cout << "Server started" << std::endl;
+  }
+
+#else
+  // Unix-like system-specific code to fork a child process
+  pid_t pid = fork();
+
+  if (pid < 0) {
+    // Fork failed
+    std::cerr << "Could not start server: " << std::endl;
+    return;
+  } else if (pid == 0) {
+    // Child process
+    RunServer();
+  } else {
+    // Parent process
+    std::cout << "Server started" << std::endl;
+  }
+#endif
+}
+
 int main(int argc, char* argv[]) {
-  
+
   // Check if this process is for python execution
   if (argc > 1) {
     if (strcmp(argv[1], "--run_python_file") == 0) {
@@ -44,58 +142,16 @@ int main(int argc, char* argv[]) {
   }
 
   if (argc > 1) {
-    CommandLineParser clp;
-    clp.SetupCommand(argc, argv);
-    return 0;
-  }
-
-  // Create logs/ folder and setup log to file
-  std::filesystem::create_directory(cortex_utils::logs_folder);
-  trantor::AsyncFileLogger asyncFileLogger;
-  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
-  asyncFileLogger.startLogging();
-  trantor::Logger::setOutputFunction(
-      [&](const char* msg, const uint64_t len) {
-        asyncFileLogger.output(msg, len);
-      },
-      [&]() { asyncFileLogger.flush(); });
-  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
-
-  int thread_num = 1;
-  std::string host = "127.0.0.1";
-  int port = 3928;
-
-  // Number of cortex.cpp threads
-  if (argc > 1) {
-    thread_num = std::atoi(argv[1]);
-  }
-
-  // Check for host argument
-  if (argc > 2) {
-    host = argv[2];
-  }
-
-  // Check for port argument
-  if (argc > 3) {
-    port = std::atoi(argv[3]);  // Convert string argument to int
+    if (strcmp(argv[1], "--start-server") == 0) {
+      RunServer();
+      return 0;
+    } else {
+      CommandLineParser clp;
+      clp.SetupCommand(argc, argv);
+      return 0;
+    }
   }
 
-  int logical_cores = std::thread::hardware_concurrency();
-  int drogon_thread_num = std::max(thread_num, logical_cores);
-  // cortex_utils::nitro_logo();
-#ifdef CORTEX_CPP_VERSION
-  LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
-#else
-  LOG_INFO << "cortex.cpp version: undefined";
-#endif
-
-  LOG_INFO << "Server started, listening at: " << host << ":" << port;
-  LOG_INFO << "Please load your model";
-  drogon::app().addListener(host, port);
-  drogon::app().setThreadNum(drogon_thread_num);
-  LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
-
-  drogon::app().run();
-
+  ForkProcess();
   return 0;
 }
diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt
index 71a44012b..942c6a92a 100644
--- a/engine/test/components/CMakeLists.txt
+++ b/engine/test/components/CMakeLists.txt
@@ -8,7 +8,7 @@ add_executable(${PROJECT_NAME} ${SRCS})
 find_package(Drogon CONFIG REQUIRED)
 find_package(GTest CONFIG REQUIRED)
 
-target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gmock 
+target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main 
                                               ${CMAKE_THREAD_LIBS_INIT})
 target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
 

From c693e55b232146737b094a73368b527229dc1703 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 29 Aug 2024 15:18:40 +0700
Subject: [PATCH 14/16] feat: run command (#1045)

---
 engine/commands/chat_cmd.cc                   |  2 +-
 engine/commands/cmd_info.cc                   | 54 +++++++++++
 engine/commands/cmd_info.h                    | 14 +++
 engine/commands/engine_init_cmd.cc            | 57 ++++++-----
 engine/commands/engine_init_cmd.h             |  2 +-
 engine/commands/model_pull_cmd.cc             | 10 +-
 engine/commands/model_pull_cmd.h              |  7 +-
 ...{start_model_cmd.cc => model_start_cmd.cc} | 12 ++-
 .../{start_model_cmd.h => model_start_cmd.h}  |  6 +-
 engine/commands/run_cmd.cc                    | 97 +++++++++++++++++++
 engine/commands/run_cmd.h                     | 22 +++++
 engine/controllers/command_line_parser.cc     | 41 +++++---
 engine/controllers/engines.cc                 |  2 +-
 engine/main.cc                                |  2 -
 engine/services/download_service.cc           |  8 +-
 engine/services/download_service.h            |  2 +-
 engine/utils/cortexso_parser.h                |  2 +-
 engine/utils/model_callback_utils.h           |  9 +-
 18 files changed, 282 insertions(+), 67 deletions(-)
 create mode 100644 engine/commands/cmd_info.cc
 create mode 100644 engine/commands/cmd_info.h
 rename engine/commands/{start_model_cmd.cc => model_start_cmd.cc} (84%)
 rename engine/commands/{start_model_cmd.h => model_start_cmd.h} (64%)
 create mode 100644 engine/commands/run_cmd.cc
 create mode 100644 engine/commands/run_cmd.h

diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc
index 185dd60fe..2c00053c9 100644
--- a/engine/commands/chat_cmd.cc
+++ b/engine/commands/chat_cmd.cc
@@ -57,7 +57,7 @@ void ChatCmd::Exec(std::string msg) {
     }
   }
   // Some instruction for user here
-  std::cout << "Inorder to exit, type exit()" << std::endl;
+  std::cout << "Inorder to exit, type `exit()`" << std::endl;
   // Model is loaded, start to chat
   {
     while (true) {
diff --git a/engine/commands/cmd_info.cc b/engine/commands/cmd_info.cc
new file mode 100644
index 000000000..bf13c6bc7
--- /dev/null
+++ b/engine/commands/cmd_info.cc
@@ -0,0 +1,54 @@
+#include "cmd_info.h"
+#include <vector>
+#include "trantor/utils/Logger.h"
+
+namespace commands {
+namespace {
+constexpr const char* kDelimiter = ":";
+
+std::vector<std::string> split(std::string& s, const std::string& delimiter) {
+  std::vector<std::string> tokens;
+  size_t pos = 0;
+  std::string token;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+    token = s.substr(0, pos);
+    tokens.push_back(token);
+    s.erase(0, pos + delimiter.length());
+  }
+  tokens.push_back(s);
+
+  return tokens;
+}
+}  // namespace
+
+CmdInfo::CmdInfo(std::string model_id) {
+  Parse(std::move(model_id));
+}
+
+void CmdInfo::Parse(std::string model_id) {
+  if (model_id.find(kDelimiter) == std::string::npos) {
+    engine_name = "cortex.llamacpp";
+    model_name = std::move(model_id);
+    branch = "main";
+  } else {
+    auto res = split(model_id, kDelimiter);
+    if (res.size() != 2) {
+      LOG_ERROR << "model_id does not valid";
+      return;
+    } else {
+      model_name = std::move(res[0]);
+      branch = std::move(res[1]);
+      if (branch.find("onnx") != std::string::npos) {
+        engine_name = "cortex.onnx";
+      } else if (branch.find("tensorrt") != std::string::npos) {
+        engine_name = "cortex.tensorrt-llm";
+      } else if (branch.find("gguf") != std::string::npos) {
+        engine_name = "cortex.llamacpp";
+      } else {
+        LOG_ERROR << "Not a valid branch model_name " << branch;
+      }
+    }
+  }
+}
+
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/cmd_info.h b/engine/commands/cmd_info.h
new file mode 100644
index 000000000..460990757
--- /dev/null
+++ b/engine/commands/cmd_info.h
@@ -0,0 +1,14 @@
+#pragma once
+#include <string>
+namespace commands {
+struct CmdInfo {
+  explicit CmdInfo(std::string model_id);
+
+  std::string engine_name;
+  std::string model_name;
+  std::string branch;
+
+ private:
+  void Parse(std::string model_id);
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc
index b4f8fe064..5a1165e23 100644
--- a/engine/commands/engine_init_cmd.cc
+++ b/engine/commands/engine_init_cmd.cc
@@ -14,10 +14,10 @@ namespace commands {
 EngineInitCmd::EngineInitCmd(std::string engineName, std::string version)
     : engineName_(std::move(engineName)), version_(std::move(version)) {}
 
-void EngineInitCmd::Exec() const {
+bool EngineInitCmd::Exec() const {
   if (engineName_.empty()) {
     LOG_ERROR << "Engine name is required";
-    return;
+    return false;
   }
 
   // Check if the architecture and OS are supported
@@ -26,7 +26,7 @@ void EngineInitCmd::Exec() const {
       system_info.os == system_info_utils::kUnsupported) {
     LOG_ERROR << "Unsupported OS or architecture: " << system_info.os << ", "
               << system_info.arch;
-    return;
+    return false;
   }
   LOG_INFO << "OS: " << system_info.os << ", Arch: " << system_info.arch;
 
@@ -34,7 +34,7 @@ void EngineInitCmd::Exec() const {
   if (std::find(supportedEngines_.begin(), supportedEngines_.end(),
                 engineName_) == supportedEngines_.end()) {
     LOG_ERROR << "Engine not supported";
-    return;
+    return false;
   }
 
   constexpr auto gitHubHost = "https://api.github.com";
@@ -78,7 +78,7 @@ void EngineInitCmd::Exec() const {
         LOG_INFO << "Matched variant: " << matched_variant;
         if (matched_variant.empty()) {
           LOG_ERROR << "No variant found for " << os_arch;
-          return;
+          return false;
         }
 
         for (auto& asset : assets) {
@@ -103,36 +103,45 @@ void EngineInitCmd::Exec() const {
                                                  .path = path,
                                              }}};
 
-            DownloadService().AddDownloadTask(
-                downloadTask, [](const std::string& absolute_path) {
-                  // try to unzip the downloaded file
-                  std::filesystem::path downloadedEnginePath{absolute_path};
-                  LOG_INFO << "Downloaded engine path: "
-                           << downloadedEnginePath.string();
-
-                  archive_utils::ExtractArchive(
-                      downloadedEnginePath.string(),
-                      downloadedEnginePath.parent_path()
-                          .parent_path()
-                          .string());
-
-                  // remove the downloaded file
-                  std::filesystem::remove(absolute_path);
-                  LOG_INFO << "Finished!";
-                });
-
-            return;
+            DownloadService().AddDownloadTask(downloadTask, [](const std::string&
+                                                                   absolute_path,
+                                                               bool unused) {
+              // try to unzip the downloaded file
+              std::filesystem::path downloadedEnginePath{absolute_path};
+              LOG_INFO << "Downloaded engine path: "
+                       << downloadedEnginePath.string();
+
+              archive_utils::ExtractArchive(
+                  downloadedEnginePath.string(),
+                  downloadedEnginePath.parent_path().parent_path().string());
+
+              // remove the downloaded file
+              // TODO(any) Could not delete file on Windows because it is currently hold by httplib(?)
+              // Not sure about other platforms
+              try {
+                std::filesystem::remove(absolute_path);
+              } catch (const std::exception& e) {
+                LOG_ERROR << "Could not delete file: " << e.what();
+              }
+              LOG_INFO << "Finished!";
+            });
+
+            return true;
           }
         }
       } catch (const json::parse_error& e) {
         std::cerr << "JSON parse error: " << e.what() << std::endl;
+        return false;
       }
     } else {
       LOG_ERROR << "HTTP error: " << res->status;
+      return false;
     }
   } else {
     auto err = res.error();
     LOG_ERROR << "HTTP error: " << httplib::to_string(err);
+    return false;
   }
+  return true;
 }
 };  // namespace commands
diff --git a/engine/commands/engine_init_cmd.h b/engine/commands/engine_init_cmd.h
index dc75d5cf6..8de74034e 100644
--- a/engine/commands/engine_init_cmd.h
+++ b/engine/commands/engine_init_cmd.h
@@ -9,7 +9,7 @@ class EngineInitCmd {
  public:
   EngineInitCmd(std::string engineName, std::string version);
 
-  void Exec() const;
+  bool Exec() const;
 
  private:
   std::string engineName_;
diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc
index 9dcd8c4ef..f8e3a7947 100644
--- a/engine/commands/model_pull_cmd.cc
+++ b/engine/commands/model_pull_cmd.cc
@@ -6,18 +6,20 @@
 #include "utils/model_callback_utils.h"
 
 namespace commands {
-ModelPullCmd::ModelPullCmd(std::string modelHandle)
-    : modelHandle_(std::move(modelHandle)) {}
+ModelPullCmd::ModelPullCmd(std::string model_handle, std::string branch)
+    : model_handle_(std::move(model_handle)), branch_(std::move(branch)) {}
 
-void ModelPullCmd::Exec() {
-  auto downloadTask = cortexso_parser::getDownloadTask(modelHandle_);
+bool ModelPullCmd::Exec() {
+  auto downloadTask = cortexso_parser::getDownloadTask(model_handle_, branch_);
   if (downloadTask.has_value()) {
     DownloadService downloadService;
     downloadService.AddDownloadTask(downloadTask.value(),
                                     model_callback_utils::DownloadModelCb);
     std::cout << "Download finished" << std::endl;
+    return true;
   } else {
     std::cout << "Model not found" << std::endl;
+    return false;
   }
 }
 
diff --git a/engine/commands/model_pull_cmd.h b/engine/commands/model_pull_cmd.h
index 2c5f658f2..da5713bdf 100644
--- a/engine/commands/model_pull_cmd.h
+++ b/engine/commands/model_pull_cmd.h
@@ -6,10 +6,11 @@ namespace commands {
 
 class ModelPullCmd {
  public:
-  ModelPullCmd(std::string modelHandle);
-  void Exec();
+explicit  ModelPullCmd(std::string model_handle, std::string branch);
+  bool Exec();
 
  private:
-  std::string modelHandle_;
+  std::string model_handle_;
+  std::string branch_;
 };
 }  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/start_model_cmd.cc b/engine/commands/model_start_cmd.cc
similarity index 84%
rename from engine/commands/start_model_cmd.cc
rename to engine/commands/model_start_cmd.cc
index 341ba2f9d..0342c3d35 100644
--- a/engine/commands/start_model_cmd.cc
+++ b/engine/commands/model_start_cmd.cc
@@ -1,14 +1,14 @@
-#include "start_model_cmd.h"
+#include "model_start_cmd.h"
 #include "httplib.h"
 #include "nlohmann/json.hpp"
 #include "trantor/utils/Logger.h"
 
 namespace commands {
-StartModelCmd::StartModelCmd(std::string host, int port,
+ModelStartCmd::ModelStartCmd(std::string host, int port,
                              const config::ModelConfig& mc)
     : host_(std::move(host)), port_(port), mc_(mc) {}
 
-void StartModelCmd::Exec() {
+bool ModelStartCmd::Exec() {
   httplib::Client cli(host_ + ":" + std::to_string(port_));
   nlohmann::json json_data;
   if (mc_.files.size() > 0) {
@@ -16,7 +16,7 @@ void StartModelCmd::Exec() {
     json_data["model_path"] = mc_.files[0];
   } else {
     LOG_WARN << "model_path is empty";
-    return;
+    return false;
   }
   json_data["model"] = mc_.name;
   json_data["system_prompt"] = mc_.system_template;
@@ -27,7 +27,7 @@ void StartModelCmd::Exec() {
   json_data["engine"] = mc_.engine;
 
   auto data_str = json_data.dump();
-
+  cli.set_read_timeout(std::chrono::seconds(60));
   auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(),
                       data_str.data(), data_str.size(), "application/json");
   if (res) {
@@ -37,7 +37,9 @@ void StartModelCmd::Exec() {
   } else {
     auto err = res.error();
     LOG_WARN << "HTTP error: " << httplib::to_string(err);
+    return false;
   }
+  return true;
 }
 
 };  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/start_model_cmd.h b/engine/commands/model_start_cmd.h
similarity index 64%
rename from engine/commands/start_model_cmd.h
rename to engine/commands/model_start_cmd.h
index 27cfc59e6..809f71c83 100644
--- a/engine/commands/start_model_cmd.h
+++ b/engine/commands/model_start_cmd.h
@@ -5,10 +5,10 @@
 
 namespace commands {
 
-class StartModelCmd{
+class ModelStartCmd{
  public:
-  StartModelCmd(std::string host, int port, const config::ModelConfig& mc);
-  void Exec();
+  explicit ModelStartCmd(std::string host, int port, const config::ModelConfig& mc);
+  bool Exec();
 
  private:
   std::string host_;
diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
new file mode 100644
index 000000000..1c7e5c7e6
--- /dev/null
+++ b/engine/commands/run_cmd.cc
@@ -0,0 +1,97 @@
+#include "run_cmd.h"
+#include "chat_cmd.h"
+#include "cmd_info.h"
+#include "config/yaml_config.h"
+#include "engine_init_cmd.h"
+#include "httplib.h"
+#include "model_pull_cmd.h"
+#include "model_start_cmd.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
+
+namespace commands {
+
+RunCmd::RunCmd(std::string host, int port, std::string model_id)
+    : host_(std::move(host)), port_(port), model_id_(std::move(model_id)) {}
+
+void RunCmd::Exec() {
+  auto address = host_ + ":" + std::to_string(port_);
+  CmdInfo ci(model_id_);
+  std::string model_file =
+      ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch;
+  // TODO should we clean all resource if something fails?
+  // Check if model existed. If not, download it
+  {
+    if (!IsModelExisted(model_file)) {
+      ModelPullCmd model_pull_cmd(ci.model_name, ci.branch);
+      if (!model_pull_cmd.Exec()) {
+        return;
+      }
+    }
+  }
+
+  // Check if engine existed. If not, download it
+  {
+    if (!IsEngineExisted(ci.engine_name)) {
+      EngineInitCmd eic(ci.engine_name, "");
+      if (!eic.Exec())
+        return;
+    }
+  }
+
+  // Start model
+  config::YamlHandler yaml_handler;
+  yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() + "/models/" +
+                                   model_file + ".yaml");
+  {
+    ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig());
+    if (!msc.Exec()) {
+      return;
+    }
+  }
+
+  // Chat
+  {
+    ChatCmd cc(host_, port_, yaml_handler.GetModelConfig());
+    cc.Exec("");
+  }
+}
+
+bool RunCmd::IsModelExisted(const std::string& model_id) {
+  if (std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" +
+                              cortex_utils::models_folder) &&
+      std::filesystem::is_directory(cortex_utils::GetCurrentPath() + "/" +
+                                    cortex_utils::models_folder)) {
+    // Iterate through directory
+    for (const auto& entry : std::filesystem::directory_iterator(
+             cortex_utils::GetCurrentPath() + "/" +
+             cortex_utils::models_folder)) {
+      if (entry.is_regular_file() && entry.path().extension() == ".yaml") {
+        try {
+          config::YamlHandler handler;
+          handler.ModelConfigFromFile(entry.path().string());
+          std::cout << entry.path().stem().string() << std::endl;
+          if (entry.path().stem().string() == model_id) {
+            return true;
+          }
+        } catch (const std::exception& e) {
+          LOG_ERROR << "Error reading yaml file '" << entry.path().string()
+                    << "': " << e.what();
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool RunCmd::IsEngineExisted(const std::string& e) {
+  if (std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" +
+                              "engines") &&
+      std::filesystem::exists(cortex_utils::GetCurrentPath() + "/" +
+                              "engines/" + e)) {
+    return true;
+  }
+  return false;
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/run_cmd.h b/engine/commands/run_cmd.h
new file mode 100644
index 000000000..ca44b9d24
--- /dev/null
+++ b/engine/commands/run_cmd.h
@@ -0,0 +1,22 @@
+#pragma once
+#include <string>
+#include <vector>
+#include "config/model_config.h"
+#include "nlohmann/json.hpp"
+
+namespace commands {
+class RunCmd {
+ public:
+  explicit RunCmd(std::string host, int port, std::string model_id);
+  void Exec();
+
+ private:
+  bool IsModelExisted(const std::string& model_id);
+  bool IsEngineExisted(const std::string& e);
+
+ private:
+  std::string host_;
+  int port_;
+  std::string model_id_;
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index d4068acb9..835445501 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -1,12 +1,14 @@
 #include "command_line_parser.h"
+#include "commands/chat_cmd.h"
+#include "commands/cmd_info.h"
 #include "commands/engine_init_cmd.h"
-#include "commands/model_list_cmd.h"
 #include "commands/model_get_cmd.h"
+#include "commands/model_list_cmd.h"
 #include "commands/model_pull_cmd.h"
-#include "commands/start_model_cmd.h"
+#include "commands/model_start_cmd.h"
+#include "commands/run_cmd.h"
 #include "commands/stop_model_cmd.h"
 #include "commands/stop_server_cmd.h"
-#include "commands/chat_cmd.h"
 #include "config/yaml_config.h"
 #include "utils/cortex_utils.h"
 
@@ -14,7 +16,7 @@ CommandLineParser::CommandLineParser() : app_("Cortex.cpp CLI") {}
 
 bool CommandLineParser::SetupCommand(int argc, char** argv) {
   std::string model_id;
-  
+
   // Models group commands
   {
     auto models_cmd =
@@ -27,9 +29,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
       config::YamlHandler yaml_handler;
       yaml_handler.ModelConfigFromFile(cortex_utils::GetCurrentPath() +
                                        "/models/" + model_id + "/model.yml");
-      commands::StartModelCmd smc("127.0.0.1", 3928,
+      commands::ModelStartCmd msc("127.0.0.1", 3928,
                                   yaml_handler.GetModelConfig());
-      smc.Exec();
+      msc.Exec();
     });
 
     auto stop_model_cmd =
@@ -55,7 +57,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
     auto get_models_cmd =
         models_cmd->add_subcommand("get", "Get info of {model_id} locally");
     get_models_cmd->add_option("model_id", model_id, "");
-    get_models_cmd->callback([&model_id](){
+    get_models_cmd->callback([&model_id]() {
       commands::ModelGetCmd command(model_id);
       command.Exec();
     });
@@ -66,8 +68,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
                             "HuggingFace repositories. For available models, "
                             "please visit https://huggingface.co/cortexso");
     model_pull_cmd->add_option("model_id", model_id, "");
+
     model_pull_cmd->callback([&model_id]() {
-      commands::ModelPullCmd command(model_id);
+      commands::CmdInfo ci(model_id);
+      commands::ModelPullCmd command(ci.model_name, ci.branch);
       command.Exec();
     });
 
@@ -81,10 +85,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   {
     auto chat_cmd =
         app_.add_subcommand("chat", "Send a chat request to a model");
-    
+
     chat_cmd->add_option("model_id", model_id, "");
-    chat_cmd->add_option("-m,--message", msg,
-                           "Message to chat with model");
+    chat_cmd->add_option("-m,--message", msg, "Message to chat with model");
 
     chat_cmd->callback([&model_id, &msg] {
       // TODO(sang) switch to <model_id>.yaml when implement model manager
@@ -115,8 +118,17 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
     EngineInstall(engines_cmd, "cortex.tensorrt-llm", version);
   }
 
-  auto run_cmd =
-      app_.add_subcommand("run", "Shortcut to start a model and chat");
+  {
+    // cortex run tinyllama:gguf
+    auto run_cmd =
+        app_.add_subcommand("run", "Shortcut to start a model and chat");
+    std::string model_id;
+    run_cmd->add_option("model_id", model_id, "");
+    run_cmd->callback([&model_id] {
+      commands::RunCmd rc("127.0.0.1", 3928, model_id);
+      rc.Exec();
+    });
+  }
 
   auto stop_cmd = app_.add_subcommand("stop", "Stop the API server");
 
@@ -131,7 +143,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
 }
 
 void CommandLineParser::EngineInstall(CLI::App* parent,
-                                      const std::string& engine_name, std::string& version) {
+                                      const std::string& engine_name,
+                                      std::string& version) {
   auto engine_cmd =
       parent->add_subcommand(engine_name, "Manage " + engine_name + " engine");
 
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index 12bea809d..b10a6b758 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -68,7 +68,7 @@ void Engines::InitEngine(const HttpRequestPtr& req,
                                              }}};
 
             DownloadService().AddAsyncDownloadTask(
-                downloadTask, [](const std::string& absolute_path) {
+                downloadTask, [](const std::string& absolute_path, bool unused) {
                   // try to unzip the downloaded file
                   std::filesystem::path downloadedEnginePath{absolute_path};
                   LOG_INFO << "Downloaded engine path: "
diff --git a/engine/main.cc b/engine/main.cc
index 75e0881f6..193f84c60 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -22,7 +22,6 @@
 #error "Unsupported platform!"
 #endif
 
-
 void RunServer(){
   // Create logs/ folder and setup log to file
       std::filesystem::create_directory(cortex_utils::logs_folder);
@@ -119,7 +118,6 @@ void ForkProcess() {
 }
 
 int main(int argc, char* argv[]) {
-
   // Check if this process is for python execution
   if (argc > 1) {
     if (strcmp(argv[1], "--run_python_file") == 0) {
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index 4a60a42a8..97c16d650 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -72,8 +72,8 @@ void DownloadService::StartDownloadItem(
         outputFile.write(data, data_length);
         return true;
       },
-      [&last, &outputFile, &callback, outputFilePath, this](uint64_t current,
-                                                            uint64_t total) {
+      [&item, &last, &outputFile, &callback, outputFilePath, this](
+          uint64_t current, uint64_t total) {
         if (current - last > kUpdateProgressThreshold) {
           last = current;
           LOG_INFO << "Downloading: " << current << " / " << total;
@@ -83,7 +83,9 @@ void DownloadService::StartDownloadItem(
           LOG_INFO << "Done download: "
                    << static_cast<double>(total) / 1024 / 1024 << " MiB";
           if (callback.has_value()) {
-            callback.value()(outputFilePath.string());
+            auto need_parse_gguf =
+                item.path.find("cortexso") == std::string::npos;
+            callback.value()(outputFilePath.string(), need_parse_gguf);
           }
           return false;
         }
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
index 86aefeb52..4efe653bf 100644
--- a/engine/services/download_service.h
+++ b/engine/services/download_service.h
@@ -41,7 +41,7 @@ class DownloadService {
   * 
   * @param task 
   */
-  using DownloadItemCb = std::function<void(const std::string&)>;
+  using DownloadItemCb = std::function<void(const std::string&, bool)>;
   void AddDownloadTask(const DownloadTask& task,
                        std::optional<DownloadItemCb> callback = std::nullopt);
 
diff --git a/engine/utils/cortexso_parser.h b/engine/utils/cortexso_parser.h
index 04f6e7fa5..91efa1fff 100644
--- a/engine/utils/cortexso_parser.h
+++ b/engine/utils/cortexso_parser.h
@@ -45,7 +45,7 @@ inline std::optional<DownloadTask> getDownloadTask(
         }
 
         DownloadTask downloadTask{};
-        downloadTask.id = modelId;
+        downloadTask.id = branch == "main" ? modelId : modelId + "-" + branch;
         downloadTask.type = DownloadType::Model;
         downloadTask.error = std::nullopt;
         downloadTask.items = downloadItems;
diff --git a/engine/utils/model_callback_utils.h b/engine/utils/model_callback_utils.h
index 753fdb205..f5504cda3 100644
--- a/engine/utils/model_callback_utils.h
+++ b/engine/utils/model_callback_utils.h
@@ -10,7 +10,7 @@
 #include "utils/file_manager_utils.h"
 
 namespace model_callback_utils {
-inline void DownloadModelCb(const std::string& path) {
+inline void DownloadModelCb(const std::string& path, bool need_parse_gguf) {
 
   std::filesystem::path path_obj(path);
   std::string filename(path_obj.filename().string());
@@ -29,7 +29,7 @@ inline void DownloadModelCb(const std::string& path) {
   // currently, only handle downloaded model with only 1 .gguf file
   // TODO: handle multipart gguf file or different model in 1 repo.
   else if (path_obj.extension().string().compare(".gguf") == 0) {
-
+    if(!need_parse_gguf) return;    
     config::GGUFHandler gguf_handler;
     config::YamlHandler yaml_handler;
     gguf_handler.Parse(path);
@@ -40,10 +40,11 @@ inline void DownloadModelCb(const std::string& path) {
     std::string yml_path(path_obj.parent_path().parent_path().string() + "/" +
                          model_config.id + ".yaml");
     std::string yaml_path(path_obj.parent_path().string() + "/model.yml");
-    if (!std::filesystem::exists(yml_path)) { // if model.yml doesn't exsited
+    if (!std::filesystem::exists(yml_path)) {  // if model.yml doesn't exist
       yaml_handler.WriteYamlFile(yml_path);
     }
-    if (!std::filesystem::exists(yaml_path)) {// if <model_id>.yaml doesn't exsited
+    if (!std::filesystem::exists(
+            yaml_path)) {  // if <model_id>.yaml doesn't exist
       yaml_handler.WriteYamlFile(yaml_path);
     }
   }

From 8fdff72c7d7cd190aeb4bbcdced93cb130ed6693 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 29 Aug 2024 15:59:59 +0700
Subject: [PATCH 15/16] fix: handle stop server (#1048)

---
 engine/commands/run_cmd.cc           |  1 -
 engine/controllers/processManager.cc | 16 +++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
index 1c7e5c7e6..a84393652 100644
--- a/engine/commands/run_cmd.cc
+++ b/engine/commands/run_cmd.cc
@@ -70,7 +70,6 @@ bool RunCmd::IsModelExisted(const std::string& model_id) {
         try {
           config::YamlHandler handler;
           handler.ModelConfigFromFile(entry.path().string());
-          std::cout << entry.path().stem().string() << std::endl;
           if (entry.path().stem().string() == model_id) {
             return true;
           }
diff --git a/engine/controllers/processManager.cc b/engine/controllers/processManager.cc
index 2874a3860..15c213453 100644
--- a/engine/controllers/processManager.cc
+++ b/engine/controllers/processManager.cc
@@ -1,11 +1,17 @@
 #include "processManager.h"
-#include <cstdlib>
+#include "utils/cortex_utils.h"
+
 #include <trantor/utils/Logger.h>
+#include <cstdlib>
 
 void processManager::destroy(
-    const HttpRequestPtr &req,
-    std::function<void(const HttpResponsePtr &)> &&callback) {
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+  app().quit();
+  Json::Value ret;
+  ret["message"] = "Program is exitting, goodbye!";
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k200OK);
+  callback(resp);
   LOG_INFO << "Program is exitting, goodbye!";
-  exit(0);
-  return;
 };

From 45103c0c03be8403ba53dce31d201162c97a13c8 Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Thu, 29 Aug 2024 01:53:11 +0700
Subject: [PATCH 16/16] feat: download cuda toolkit

Signed-off-by: James <namnh0122@gmail.com>
---
 .gitignore                            |  3 +-
 engine/commands/engine_init_cmd.cc    | 60 +++++++++++++++-
 engine/main.cc                        | 98 ++++++++++++++-------------
 engine/services/download_service.h    |  2 +-
 engine/utils/cuda_toolkit_utils.h     | 63 +++++++++++++++++
 engine/utils/file_manager_utils.h     | 50 +++++++++++++-
 engine/utils/semantic_version_utils.h | 34 ++++++++++
 engine/utils/system_info_utils.h      | 35 ++++++++++
 8 files changed, 291 insertions(+), 54 deletions(-)
 create mode 100644 engine/utils/cuda_toolkit_utils.h
 create mode 100644 engine/utils/semantic_version_utils.h

diff --git a/.gitignore b/.gitignore
index 237fb5b33..6b785abe9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 # cortex-js
+.DS_Store
 cortex-js/cortex.db
 dist
 *.lock
@@ -18,4 +19,4 @@ cortex-js/package-lock.json
 .vscode
 cortex-js/command
 cortex-js/src/infrastructure/commanders/test/test_data
-**/vcpkg_installed
\ No newline at end of file
+**/vcpkg_installed
diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc
index 5a1165e23..0f22bd57f 100644
--- a/engine/commands/engine_init_cmd.cc
+++ b/engine/commands/engine_init_cmd.cc
@@ -7,6 +7,7 @@
 #include "utils/archive_utils.h"   
 #include "utils/system_info_utils.h"
 // clang-format on
+#include "utils/cuda_toolkit_utils.h"
 #include "utils/engine_matcher_utils.h"
 
 namespace commands {
@@ -103,9 +104,10 @@ bool EngineInitCmd::Exec() const {
                                                  .path = path,
                                              }}};
 
-            DownloadService().AddDownloadTask(downloadTask, [](const std::string&
-                                                                   absolute_path,
-                                                               bool unused) {
+            DownloadService download_service;
+            download_service.AddDownloadTask(downloadTask, [](const std::string&
+                                                                  absolute_path,
+                                                              bool unused) {
               // try to unzip the downloaded file
               std::filesystem::path downloadedEnginePath{absolute_path};
               LOG_INFO << "Downloaded engine path: "
@@ -125,6 +127,58 @@ bool EngineInitCmd::Exec() const {
               }
               LOG_INFO << "Finished!";
             });
+            if (system_info.os == "mac" || engineName_ == "cortex.onnx") {
+              return false;
+            }
+            // download cuda toolkit
+            const std::string jan_host = "https://catalog.jan.ai";
+            const std::string cuda_toolkit_file_name = "cuda.tar.gz";
+            const std::string download_id = "cuda";
+
+            auto gpu_driver_version = system_info_utils::GetDriverVersion();
+
+            auto cuda_runtime_version =
+                cuda_toolkit_utils::GetCompatibleCudaToolkitVersion(
+                    gpu_driver_version, system_info.os, engineName_);
+
+            std::ostringstream cuda_toolkit_path;
+            cuda_toolkit_path << "dist/cuda-dependencies/" << 11.7 << "/"
+                              << system_info.os << "/"
+                              << cuda_toolkit_file_name;
+
+            LOG_DEBUG << "Cuda toolkit download url: " << jan_host
+                      << cuda_toolkit_path.str();
+
+            auto downloadCudaToolkitTask = DownloadTask{
+                .id = download_id,
+                .type = DownloadType::CudaToolkit,
+                .error = std::nullopt,
+                .items = {DownloadItem{
+                    .id = download_id,
+                    .host = jan_host,
+                    .fileName = cuda_toolkit_file_name,
+                    .type = DownloadType::CudaToolkit,
+                    .path = cuda_toolkit_path.str(),
+                }},
+            };
+
+            download_service.AddDownloadTask(
+                downloadCudaToolkitTask,
+                [](const std::string& absolute_path, bool unused) {
+                  LOG_DEBUG << "Downloaded cuda path: " << absolute_path;
+                  // try to unzip the downloaded file
+                  std::filesystem::path downloaded_path{absolute_path};
+
+                  archive_utils::ExtractArchive(
+                      absolute_path,
+                      downloaded_path.parent_path().parent_path().string());
+
+                  try {
+                    std::filesystem::remove(absolute_path);
+                  } catch (std::exception& e) {
+                    LOG_ERROR << "Error removing downloaded file: " << e.what();
+                  }
+                });
 
             return true;
           }
diff --git a/engine/main.cc b/engine/main.cc
index 193f84c60..4b966b3f6 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -22,53 +22,53 @@
 #error "Unsupported platform!"
 #endif
 
-void RunServer(){
+void RunServer() {
   // Create logs/ folder and setup log to file
-      std::filesystem::create_directory(cortex_utils::logs_folder);
-      trantor::AsyncFileLogger asyncFileLogger;
-      asyncFileLogger.setFileName(cortex_utils::logs_base_name);
-      asyncFileLogger.startLogging();
-      trantor::Logger::setOutputFunction(
-          [&](const char* msg, const uint64_t len) {
-            asyncFileLogger.output(msg, len);
-          },
-          [&]() { asyncFileLogger.flush(); });
-      asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
-      // Number of cortex.cpp threads
-      // if (argc > 1) {
-      //   thread_num = std::atoi(argv[1]);
-      // }
+  std::filesystem::create_directory(cortex_utils::logs_folder);
+  trantor::AsyncFileLogger asyncFileLogger;
+  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+  asyncFileLogger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        asyncFileLogger.output(msg, len);
+      },
+      [&]() { asyncFileLogger.flush(); });
+  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+  // Number of cortex.cpp threads
+  // if (argc > 1) {
+  //   thread_num = std::atoi(argv[1]);
+  // }
 
-      // // Check for host argument
-      // if (argc > 2) {
-      //   host = argv[2];
-      // }
+  // // Check for host argument
+  // if (argc > 2) {
+  //   host = argv[2];
+  // }
 
-      // // Check for port argument
-      // if (argc > 3) {
-      //   port = std::atoi(argv[3]);  // Convert string argument to int
-      // }
-      int thread_num = 1;
-      std::string host = "127.0.0.1";
-      int port = 3928;
+  // // Check for port argument
+  // if (argc > 3) {
+  //   port = std::atoi(argv[3]);  // Convert string argument to int
+  // }
+  int thread_num = 1;
+  std::string host = "127.0.0.1";
+  int port = 3928;
 
-      int logical_cores = std::thread::hardware_concurrency();
-      int drogon_thread_num = std::max(thread_num, logical_cores);
-      // cortex_utils::nitro_logo();
+  int logical_cores = std::thread::hardware_concurrency();
+  int drogon_thread_num = std::max(thread_num, logical_cores);
+  // cortex_utils::nitro_logo();
 #ifdef CORTEX_CPP_VERSION
-      LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
+  LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
 #else
-      LOG_INFO << "cortex.cpp version: undefined";
+  LOG_INFO << "cortex.cpp version: undefined";
 #endif
 
-      LOG_INFO << "Server started, listening at: " << host << ":" << port;
-      LOG_INFO << "Please load your model";
-      drogon::app().addListener(host, port);
-      drogon::app().setThreadNum(drogon_thread_num);
-      LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
+  LOG_INFO << "Server started, listening at: " << host << ":" << port;
+  LOG_INFO << "Please load your model";
+  drogon::app().addListener(host, port);
+  drogon::app().setThreadNum(drogon_thread_num);
+  LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
 
-      drogon::app().run();
-      // return 0;
+  drogon::app().run();
+  // return 0;
 }
 
 void ForkProcess() {
@@ -80,19 +80,21 @@ void ForkProcess() {
   ZeroMemory(&si, sizeof(si));
   si.cb = sizeof(si);
   ZeroMemory(&pi, sizeof(pi));
-  std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
+  std::string cmds =
+      cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
   // Create child process
   if (!CreateProcess(
           NULL,  // No module name (use command line)
-          const_cast<char*>(cmds.c_str()),  // Command line (replace with your actual executable)
-          NULL,                   // Process handle not inheritable
-          NULL,                   // Thread handle not inheritable
-          FALSE,                  // Set handle inheritance to FALSE
-          0,                      // No creation flags
-          NULL,                   // Use parent's environment block
-          NULL,                   // Use parent's starting directory
-          &si,                    // Pointer to STARTUPINFO structure
-          &pi))                   // Pointer to PROCESS_INFORMATION structure
+          const_cast<char*>(
+              cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,               // Process handle not inheritable
+          NULL,               // Thread handle not inheritable
+          FALSE,              // Set handle inheritance to FALSE
+          0,                  // No creation flags
+          NULL,               // Use parent's environment block
+          NULL,               // Use parent's starting directory
+          &si,                // Pointer to STARTUPINFO structure
+          &pi))               // Pointer to PROCESS_INFORMATION structure
   {
     std::cout << "Could not start server: " << GetLastError() << std::endl;
   } else {
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
index 4efe653bf..a8f7f109b 100644
--- a/engine/services/download_service.h
+++ b/engine/services/download_service.h
@@ -4,7 +4,7 @@
 #include <optional>
 #include <vector>
 
-enum class DownloadType { Model, Engine, Miscellaneous };
+enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit };
 
 enum class DownloadStatus {
   Pending,
diff --git a/engine/utils/cuda_toolkit_utils.h b/engine/utils/cuda_toolkit_utils.h
new file mode 100644
index 000000000..748af1bd3
--- /dev/null
+++ b/engine/utils/cuda_toolkit_utils.h
@@ -0,0 +1,63 @@
+#include <string>
+#include "utils/semantic_version_utils.h"
+
+namespace cuda_toolkit_utils {
+// those semantic versions are based on: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html#f1
+inline std::string GetCompatibleCudaToolkitVersion(
+    const std::string& driver_semantic_version, const std::string& os,
+    const std::string& engine) {
+
+  if (engine == "cortex.tensorrt-llm") {
+    // if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4
+    if (os == "windows") {
+      if (semantic_version_utils::CompareSemanticVersion(
+              driver_semantic_version, "527.41") >= 0) {
+        return "12.4";
+      } else {
+        throw std::runtime_error(
+            "GPU driver version not supported. Minimum "
+            "required driver version is 527.41");
+      }
+    } else if (os == "linux") {
+      if (semantic_version_utils::CompareSemanticVersion(
+              driver_semantic_version, "525.60.13") >= 0) {
+        return "12.4";
+      } else {
+        throw std::runtime_error(
+            "GPU driver version not supported. Minimum required driver version "
+            "is 525.60.13");
+      }
+    } else {
+      throw std::runtime_error("Unsupported OS");
+    }
+  }
+
+  if (os == "windows") {
+    if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
+                                                       "527.41") >= 0) {
+      return "12.4";
+    } else if (semantic_version_utils::CompareSemanticVersion(
+                   driver_semantic_version, "452.39") >= 0) {
+      return "11.7";
+    } else {
+      throw std::runtime_error(
+          "GPU driver version not supported. Minimum "
+          "required driver version is 452.39");
+    }
+  } else if (os == "linux") {
+    if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
+                                                       "525.60.13") >= 0) {
+      return "12.4";
+    } else if (semantic_version_utils::CompareSemanticVersion(
+                   driver_semantic_version, "450.80.02") >= 0) {
+      return "11.7";
+    } else {
+      throw std::runtime_error(
+          "GPU driver version not supported. Minimum "
+          "required driver version is 450.80.02");
+    }
+  } else {
+    throw std::runtime_error("Unsupported OS");
+  }
+}
+}  // namespace cuda_toolkit_utils
\ No newline at end of file
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
index 77c6b74a6..334116fe7 100644
--- a/engine/utils/file_manager_utils.h
+++ b/engine/utils/file_manager_utils.h
@@ -4,17 +4,63 @@
 #include <string>
 #include <string_view>
 
+#if defined(__APPLE__) && defined(__MACH__)
+#include <mach-o/dyld.h>
+#elif defined(__linux__)
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+#endif
+
 namespace file_manager_utils {
 
+inline std::filesystem::path GetExecutableFolderContainerPath() {
+#if defined(__APPLE__) && defined(__MACH__)
+  char buffer[1024];
+  uint32_t size = sizeof(buffer);
+
+  if (_NSGetExecutablePath(buffer, &size) == 0) {
+    LOG_INFO << "Executable path: " << buffer;
+    return std::filesystem::path{buffer}.parent_path();
+  } else {
+    LOG_ERROR << "Failed to get executable path";
+    return std::filesystem::current_path();
+  }
+#elif defined(__linux__)
+  // TODO: haven't tested
+  char buffer[1024];
+  ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1);
+  if (len != -1) {
+    buffer[len] = '\0';
+    LOG_INFO << "Executable path: " << buffer;
+    return std::filesystem::path{buffer}.parent_path();
+  } else {
+    LOG_ERROR << "Failed to get executable path";
+    return std::filesystem::current_path();
+  }
+#elif defined(_WIN32)
+  // TODO: haven't tested
+  char buffer[MAX_PATH];
+  GetModuleFileNameA(NULL, buffer, MAX_PATH);
+  LOG_INFO << "Executable path: " << buffer;
+  return std::filesystem::path{buffer}.parent_path();
+#else
+  LOG_ERROR << "Unsupported platform!";
+  return std::filesystem::current_path();
+#endif
+}
+
 inline std::filesystem::path GetContainerFolderPath(
     const std::string_view type) {
-  const auto current_path{std::filesystem::current_path()};
+  const auto current_path{GetExecutableFolderContainerPath()};
   auto container_folder_path = std::filesystem::path{};
 
   if (type == "Model") {
     container_folder_path = current_path / "models";
   } else if (type == "Engine") {
     container_folder_path = current_path / "engines";
+  } else if (type == "CudaToolkit") {
+    container_folder_path = current_path;
   } else {
     container_folder_path = current_path / "misc";
   }
@@ -35,6 +81,8 @@ inline std::string downloadTypeToString(DownloadType type) {
       return "Engine";
     case DownloadType::Miscellaneous:
       return "Misc";
+    case DownloadType::CudaToolkit:
+      return "CudaToolkit";
     default:
       return "UNKNOWN";
   }
diff --git a/engine/utils/semantic_version_utils.h b/engine/utils/semantic_version_utils.h
new file mode 100644
index 000000000..ea9244dc1
--- /dev/null
+++ b/engine/utils/semantic_version_utils.h
@@ -0,0 +1,34 @@
+#include <sstream>
+#include <vector>
+
+namespace semantic_version_utils {
+inline std::vector<int> SplitVersion(const std::string& version) {
+  std::vector<int> parts;
+  std::stringstream ss(version);
+  std::string part;
+
+  while (std::getline(ss, part, '.')) {
+    parts.push_back(std::stoi(part));
+  }
+
+  while (parts.size() < 3) {
+    parts.push_back(0);
+  }
+
+  return parts;
+}
+
+inline int CompareSemanticVersion(const std::string& version1,
+                                  const std::string& version2) {
+  std::vector<int> v1 = SplitVersion(version1);
+  std::vector<int> v2 = SplitVersion(version2);
+
+  for (size_t i = 0; i < 3; ++i) {
+    if (v1[i] < v2[i])
+      return -1;
+    if (v1[i] > v2[i])
+      return 1;
+  }
+  return 0;
+}
+}  // namespace semantic_version_utils
\ No newline at end of file
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index d13935295..16a9570b7 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -12,6 +12,7 @@ namespace system_info_utils {
 
 constexpr static auto kUnsupported{"Unsupported"};
 constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"};
+constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"};
 constexpr static auto kGpuQueryCommand{
     "nvidia-smi --query-gpu=index,memory.total,name,compute_cap "
     "--format=csv,noheader,nounits"};
@@ -177,6 +178,31 @@ inline bool IsNvidiaSmiAvailable() {
 #endif
 }
 
+inline std::string GetDriverVersion() {
+  if (!IsNvidiaSmiAvailable()) {
+    LOG_INFO << "nvidia-smi is not available!";
+    return "";
+  }
+  try {
+    CommandExecutor cmd("nvidia-smi");
+    auto output = cmd.execute();
+
+    const std::regex driver_version_reg(kDriverVersionRegex);
+    std::smatch match;
+
+    if (std::regex_search(output, match, driver_version_reg)) {
+      LOG_INFO << "Gpu Driver Version: " << match[1].str();
+      return match[1].str();
+    } else {
+      LOG_ERROR << "Gpu Driver not found!";
+      return "";
+    }
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Error: " << e.what();
+    return "";
+  }
+}
+
 inline std::string GetCudaVersion() {
   if (!IsNvidiaSmiAvailable()) {
     LOG_INFO << "nvidia-smi is not available!";
@@ -207,6 +233,9 @@ struct GpuInfo {
   std::string vram;
   std::string name;
   std::string arch;
+  // nvidia driver version. Haven't checked for AMD GPU.
+  std::optional<std::string> driver_version;
+  std::optional<std::string> cuda_driver_version;
   std::optional<std::string> compute_cap;
 };
 
@@ -271,6 +300,10 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
   std::vector<GpuInfo> gpuInfoList;
 
   try {
+    // TODO: improve by parsing both in one command execution
+    auto driver_version = GetDriverVersion();
+    auto cuda_version = GetCudaVersion();
+
     CommandExecutor cmd(kGpuQueryCommand);
     auto output = cmd.execute();
 
@@ -285,6 +318,8 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
           match[2].str(),              // vram
           match[3].str(),              // name
           GetGpuArch(match[3].str()),  // arch
+          driver_version,              // driver_version
+          cuda_version,                // cuda_driver_version
           match[4].str()               // compute_cap
       };
       gpuInfoList.push_back(gpuInfo);