diff --git a/.github/scripts/e2e-test-python-linux-and-mac.sh b/.github/scripts/e2e-test-python-linux-and-mac.sh
new file mode 100755
index 000000000..586e1024c
--- /dev/null
+++ b/.github/scripts/e2e-test-python-linux-and-mac.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+## Example run command
+# ./e2e-test-python-linux-and-mac.sh '../../examples/build/server' './e2e-test.py'
+
+# Check for required arguments
+if [[ $# -ne 2 ]]; then
+    echo "Usage: $0 <path_to_binary> <path_to_python_file>"
+    exit 1
+fi
+
+BINARY_PATH=$1
+PYTHON_FILE_EXECUTION_PATH=$2
+
+rm /tmp/python-file-execution-res.log /tmp/server.log
+
+# Random port to ensure it's not used
+min=10000
+max=11000
+range=$((max - min + 1))
+PORT=$((RANDOM % range + min))
+
+# Install numpy for Python
+export PYTHONHOME=$(pwd)/engines/cortex.python/python/
+export LD_LIBRARY_PATH="$PYTHONHOME:$LD_LIBRARY_PATH"
+export DYLD_FALLBACK_LIBRARY_PATH="$PYTHONHOME:$DYLD_FALLBACK_LIBRARY_PATH"
+echo "Set Python HOME to $PYTHONHOME"
+echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+./engines/cortex.python/python/bin/python3 -m ensurepip
+./engines/cortex.python/python/bin/python3 -m pip install --upgrade pip
+./engines/cortex.python/python/bin/python3 -m pip install numpy --target=$PYTHONHOME/lib/python/site-packages/
+
+# Start the binary file
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/server.log &
+
+pid=$!
+
+if ! ps -p $pid >/dev/null; then
+    echo "server failed to start. Logs:"
+    cat /tmp/server.log
+    exit 1
+fi
+
+# Wait for a few seconds to let the server start
+sleep 3
+
+# Run the curl commands
+response1=$(curl --connect-timeout 60 -o /tmp/python-file-execution-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/fine_tuning/job" \
+    --header 'Content-Type: application/json' \
+    --data '{
+        "file_execution_path": "'$PYTHON_FILE_EXECUTION_PATH'"
+    }')
+
+error_occurred=0
+
+# Verify the response
+if [[ "$response1" -ne 200 ]]; then
+    echo "The python file execution curl command failed with status code: $response1"
+    cat /tmp/python-file-execution-res.log
+    error_occurred=1
+fi
+
+# Verify the output of the Python file in output.txt
+OUTPUT_FILE="./output.txt"
+EXPECTED_OUTPUT="1 2 3"  # Replace with the expected content
+
+if [[ -f "$OUTPUT_FILE" ]]; then
+    actual_output=$(cat "$OUTPUT_FILE")
+    if [[ "$actual_output" != "$EXPECTED_OUTPUT" ]]; then
+        echo "The output of the Python file does not match the expected output."
+        echo "Expected: $EXPECTED_OUTPUT"
+        echo "Actual: $actual_output"
+        error_occurred=1
+    else
+        echo "The output of the Python file matches the expected output."
+    fi
+else
+    echo "Output file $OUTPUT_FILE does not exist."
+    error_occurred=1
+fi
+
+
+if [[ "$error_occurred" -eq 1 ]]; then
+    echo "Server test run failed!!!!!!!!!!!!!!!!!!!!!!"
+    echo "Server Error Logs:"
+    cat /tmp/server.log
+    kill $pid
+    echo "An error occurred while running the server."
+    exit 1
+fi
+
+echo "----------------------"
+echo "Log server:"
+cat /tmp/server.log
+
+echo "Server test run successfully!"
+
+# Kill the server process
+kill $pid
\ No newline at end of file
diff --git a/.github/scripts/e2e-test-python-windows.bat b/.github/scripts/e2e-test-python-windows.bat
new file mode 100755
index 000000000..2e673f58f
--- /dev/null
+++ b/.github/scripts/e2e-test-python-windows.bat
@@ -0,0 +1,119 @@
+@echo off
+
+setlocal enabledelayedexpansion
+
+set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
+
+rem Check for required arguments
+if "%~2"=="" (
+    echo Usage: %~0 ^<path_to_binary^> ^<path_to_python_file^>
+    exit /b 1
+)
+
+set "BINARY_PATH=%~1"
+set "PYTHON_FILE_EXECUTION_PATH=%~2"
+
+for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"
+
+echo BINARY_NAME=%BINARY_NAME%
+
+del %TEMP%\response1.log 2>nul
+del %TEMP%\server.log 2>nul
+
+set /a min=9999
+set /a max=11000
+set /a range=max-min+1
+set /a PORT=%min% + %RANDOM% %% %range%
+
+rem Install numpy for Python
+set "PYTHONHOME=%cd%\engines\cortex.python\python"
+echo Set Python HOME to %PYTHONHOME%
+%PYTHONHOME%\python.exe -m ensurepip
+%PYTHONHOME%\python.exe -m pip install --upgrade pip
+%PYTHONHOME%\python.exe -m pip install numpy --target=%PYTHONHOME%\Lib\site-packages\
+
+rem Start the binary file
+start "" /B "%BINARY_PATH%" 1 "127.0.0.1" %PORT%  > "%TEMP%\server.log" 2>&1
+
+ping -n 3 127.0.0.1 > nul
+
+rem Capture the PID of the started process with "server" in its name
+for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
+    set "pid=%%a"
+)
+
+echo pid=%pid%
+
+if not defined pid (
+    echo server failed to start. Logs:
+    type %TEMP%\server.log
+    echo.
+    exit /b 1
+)
+
+rem Wait for a few seconds to let the server start
+
+rem Define JSON strings for curl data
+call set "PYTHON_FILE_EXECUTION_PATH_STRING=%%PYTHON_FILE_EXECUTION_PATH:\=\\%%"
+set "curl_data1={\"file_execution_path\":\"%PYTHON_FILE_EXECUTION_PATH_STRING%\"}"
+
+rem Print the values of curl_data for debugging
+echo curl_data1=%curl_data1%
+
+rem Run the curl commands and capture the status code
+curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/fine_tuning/job" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
+
+set "error_occurred=0"
+
+rem Read the status code directly from the response file
+set "response1="
+for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
+
+if "%response1%" neq "200" (
+    echo The first curl command failed with status code: %response1%
+    type %TEMP%\response1.log
+    echo.
+    set "error_occurred=1"
+)
+
+echo ----------------------
+echo Log python file execution:
+type %TEMP%\response1.log
+echo.
+
+rem Verification step: Check the contents of output.txt
+set "expected_output=1 2 3"
+set "actual_output="
+if exist "output.txt" (
+    for /f "delims=" %%x in (output.txt) do set "actual_output=%%x"
+    if "!actual_output!"=="!expected_output!" (
+        echo Verification succeeded: output.txt contains the expected data.
+    ) else (
+        echo Verification failed: output.txt does not contain the expected data.
+        echo Expected: !expected_output!
+        echo Actual: !actual_output!
+        set "error_occurred=1"
+    )
+) else (
+    echo Verification failed: output.txt does not exist.
+    set "error_occurred=1"
+)
+
+echo ----------------------
+echo Server logs:
+type %TEMP%\server.log
+echo.
+
+if "%error_occurred%"=="1" (
+    echo Server test run failed!!!!!!!!!!!!!!!!!!!!!!
+    taskkill /f /pid %pid%
+    echo An error occurred while running the server.
+    exit /b 1
+)
+
+echo Server test run successfully!
+
+rem Kill the server process
+taskkill /f /im server.exe 2>nul || exit /B 0
+
+endlocal
\ No newline at end of file
diff --git a/.github/scripts/python-file-to-test.py b/.github/scripts/python-file-to-test.py
new file mode 100644
index 000000000..e897baf42
--- /dev/null
+++ b/.github/scripts/python-file-to-test.py
@@ -0,0 +1,9 @@
+import sys;
+for path in sys.path:
+    print(path)
+
+import numpy as np
+print("Numpy version: " + np.__version__)
+
+with open('output.txt', 'w') as file:
+    file.write(' '.join(map(str, np.array([1, 2, 3]))))
\ No newline at end of file
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 33c8a4533..e84efa4e7 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -12,6 +12,7 @@ on:
 env:
   LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
   EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
+  PYTHON_FILE_EXECUTION_PATH: "python-file-to-test.py"
 
 jobs:
   build-and-test:
@@ -26,107 +27,126 @@ jobs:
             runs-on: "ubuntu-18-04"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
             run-e2e: true
+            run-python-e2e: true
 
           - os: "linux"
             name: "amd64-avx"
             runs-on: "ubuntu-18-04"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-avx512"
             runs-on: "ubuntu-18-04"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-vulkan"
             runs-on: "ubuntu-18-04-cuda-11-7"
             cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-cuda-11-7"
             runs-on: "ubuntu-18-04-cuda-11-7"
             cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "linux"
             name: "amd64-cuda-12-0"
             runs-on: "ubuntu-18-04-cuda-12-0"
             cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "mac"
             name: "amd64"
             runs-on: "macos-13"
             cmake-flags: ""
             run-e2e: true
+            run-python-e2e: true
 
           - os: "mac"
             name: "arm64"
             runs-on: "mac-silicon"
             cmake-flags: "-DMAC_ARM64=ON"
             run-e2e: true
+            run-python-e2e: true
 
           - os: "windows"
             name: "amd64-avx2"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: true
+            run-python-e2e: true
 
           - os: "windows"
             name: "amd64-avx"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx512"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-vulkan"
             runs-on: "windows-latest"
             cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx-cuda-12-0"
             runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx512-cuda-12-0"
             runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "windows-cuda-11-7"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx-cuda-11-7"
             runs-on: "windows-cuda-11-7"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
+
           - os: "windows"
             name: "amd64-avx512-cuda-11-7"
             runs-on: "windows-cuda-11-7"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
+            run-python-e2e: false
 
     steps:
       - name: Clone
@@ -161,6 +181,12 @@ jobs:
           cd cortex-cpp
           make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
 
+      - name: Run python e2e testing
+        if: ${{ matrix.run-python-e2e }}
+        run: |
+          cd cortex-cpp
+          make run-python-e2e-test RUN_TESTS=true PYTHON_FILE_EXECUTION_PATH=${{ env.PYTHON_FILE_EXECUTION_PATH }}
+
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:
diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt
index 8c01d2256..4e2092afe 100644
--- a/cortex-cpp/CMakeLists.txt
+++ b/cortex-cpp/CMakeLists.txt
@@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.5)
 project(cortex-cpp C CXX)
 
 include(engines/cortex.llamacpp/engine.cmake)
+if(NOT LLAMA_CUDA AND (LLAMA_AVX2 OR APPLE))
+  include(engines/cortex.python/engine.cmake)
+endif()
 include(CheckIncludeFileCXX)
 
 check_include_file_cxx(any HAS_ANY)
diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile
index 98486f023..20cd4da7f 100644
--- a/cortex-cpp/Makefile
+++ b/cortex-cpp/Makefile
@@ -6,6 +6,7 @@ CMAKE_EXTRA_FLAGS ?= ""
 RUN_TESTS ?= false
 LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf"
+PYTHON_FILE_EXECUTION_PATH ?= "python-file-to-test.py"
 CODE_SIGN ?= false
 AZURE_KEY_VAULT_URI ?= xxxx
 AZURE_CLIENT_ID ?= xxxx
@@ -97,6 +98,30 @@ else
 	rm -rf uploads/;
 endif
 
+run-python-e2e-test:
+ifeq ($(RUN_TESTS),false)
+	@echo "Skipping tests"
+	@exit 0
+endif
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "cp -r build\engines\cortex.python cortex-cpp\engines\;"
+	@powershell -Command "cd cortex-cpp;..\..\.github\scripts\e2e-test-python-windows.bat cortex-cpp.exe ..\..\.github\scripts\$(PYTHON_FILE_EXECUTION_PATH);"
+	@powershell -Command "cd .\cortex-cpp\; rm *.txt;"
+	@powershell -Command "mkdir python-temp; cd .\cortex-cpp\engines; Move-Item -Path .\cortex.python -Destination ..\..\python-temp;"
+else ifeq ($(shell uname -s),Linux)
+	@cp -rf build/engines/cortex.python cortex-cpp/engines/; \
+	cd cortex-cpp; \
+	chmod +x ../../.github/scripts/e2e-test-python-linux-and-mac.sh && ../../.github/scripts/e2e-test-python-linux-and-mac.sh ./cortex-cpp ../../.github/scripts/$(PYTHON_FILE_EXECUTION_PATH); \
+	rm -rf uploads/; \
+	rm -rf ./engines/cortex.python;
+else
+	@cp -rf build/engines/cortex.python cortex-cpp/engines/; \
+	cd cortex-cpp; \
+	chmod +x ../../.github/scripts/e2e-test-python-linux-and-mac.sh && ../../.github/scripts/e2e-test-python-linux-and-mac.sh ./cortex-cpp ../../.github/scripts/$(PYTHON_FILE_EXECUTION_PATH); \
+	rm -rf uploads/; \
+	rm -rf ./engines/cortex.python;
+endif
+
 clean:
 ifeq ($(OS),Windows_NT)
 	@powershell -Command "rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz;"
diff --git a/cortex-cpp/common/base.h b/cortex-cpp/common/base.h
index c6e3aa6a9..382f21b80 100644
--- a/cortex-cpp/common/base.h
+++ b/cortex-cpp/common/base.h
@@ -8,16 +8,23 @@ class BaseModel {
   virtual ~BaseModel() {}
 
   // Model management
-  virtual void LoadModel(const HttpRequestPtr& req,
-                         std::function<void(const HttpResponsePtr&)>&& callback) = 0;
+  virtual void LoadModel(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
   virtual void UnloadModel(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
   virtual void ModelStatus(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
-  
-  virtual void GetModels(const HttpRequestPtr& req,
+  virtual void GetModels(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
+  virtual void GetEngines(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) = 0;
+  virtual void FineTuning(
+      const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) = 0;
 };
 
diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc
index 1fdb74509..225775d3d 100644
--- a/cortex-cpp/controllers/server.cc
+++ b/cortex-cpp/controllers/server.cc
@@ -13,24 +13,25 @@ using json = nlohmann::json;
 namespace inferences {
 namespace {
 constexpr static auto kLlamaEngine = "cortex.llamacpp";
-constexpr static auto kLlamaLibPath = "/engines/cortex.llamacpp";
+constexpr static auto kPythonRuntimeEngine = "cortex.python";
 }  // namespace
 
-server::server()
-    : engine_{nullptr} {
+server::server(){
 
-          // Some default values for now below
-          // log_disable();  // Disable the log to file feature, reduce bloat for
-          // target
-          // system ()
-      };
+    // Some default values for now below
+    // log_disable();  // Disable the log to file feature, reduce bloat for
+    // target
+    // system ()
+};
 
 server::~server() {}
 
 void server::ChatCompletion(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!IsEngineLoaded()) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
+  if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
     auto resp = cortex_utils::nitroHttpJsonResponse(res);
@@ -44,10 +45,11 @@ void server::ChatCompletion(
   auto json_body = req->getJsonObject();
   bool is_stream = (*json_body).get("stream", false).asBool();
   auto q = std::make_shared<SyncQueue>();
-  engine_->HandleChatCompletion(json_body,
-                                [q](Json::Value status, Json::Value res) {
-                                  q->push(std::make_pair(status, res));
-                                });
+  std::get<EngineI*>(engines_[engine_type].engine)
+      ->HandleChatCompletion(json_body,
+                             [q](Json::Value status, Json::Value res) {
+                               q->push(std::make_pair(status, res));
+                             });
   LOG_TRACE << "Wait to chat completion responses";
   if (is_stream) {
     ProcessStreamRes(std::move(callback), q);
@@ -59,7 +61,9 @@ void server::ChatCompletion(
 
 void server::Embedding(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!IsEngineLoaded()) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
+  if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
     auto resp = cortex_utils::nitroHttpJsonResponse(res);
@@ -71,10 +75,11 @@ void server::Embedding(const HttpRequestPtr& req,
 
   LOG_TRACE << "Start embedding";
   SyncQueue q;
-  engine_->HandleEmbedding(req->getJsonObject(),
-                           [&q](Json::Value status, Json::Value res) {
-                             q.push(std::make_pair(status, res));
-                           });
+  std::get<EngineI*>(engines_[engine_type].engine)
+      ->HandleEmbedding(req->getJsonObject(),
+                        [&q](Json::Value status, Json::Value res) {
+                          q.push(std::make_pair(status, res));
+                        });
   LOG_TRACE << "Wait to embedding";
   ProcessNonStreamRes(std::move(callback), q);
   LOG_TRACE << "Done embedding";
@@ -83,7 +88,9 @@ void server::Embedding(const HttpRequestPtr& req,
 void server::UnloadModel(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!IsEngineLoaded()) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
+  if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
     auto resp = cortex_utils::nitroHttpJsonResponse(res);
@@ -93,21 +100,24 @@ void server::UnloadModel(
     return;
   }
   LOG_TRACE << "Start unload model";
-  engine_->UnloadModel(
-      req->getJsonObject(),
-      [cb = std::move(callback)](Json::Value status, Json::Value res) {
-        auto resp = cortex_utils::nitroHttpJsonResponse(res);
-        resp->setStatusCode(
-            static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-        cb(resp);
-      });
+  std::get<EngineI*>(engines_[engine_type].engine)
+      ->UnloadModel(
+          req->getJsonObject(),
+          [cb = std::move(callback)](Json::Value status, Json::Value res) {
+            auto resp = cortex_utils::nitroHttpJsonResponse(res);
+            resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
+                status["status_code"].asInt()));
+            cb(resp);
+          });
   LOG_TRACE << "Done unload model";
 }
 
 void server::ModelStatus(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!IsEngineLoaded()) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
+  if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
     auto resp = cortex_utils::nitroHttpJsonResponse(res);
@@ -118,20 +128,23 @@ void server::ModelStatus(
   }
 
   LOG_TRACE << "Start to get model status";
-  engine_->GetModelStatus(
-      req->getJsonObject(),
-      [cb = std::move(callback)](Json::Value status, Json::Value res) {
-        auto resp = cortex_utils::nitroHttpJsonResponse(res);
-        resp->setStatusCode(
-            static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-        cb(resp);
-      });
+  std::get<EngineI*>(engines_[engine_type].engine)
+      ->GetModelStatus(
+          req->getJsonObject(),
+          [cb = std::move(callback)](Json::Value status, Json::Value res) {
+            auto resp = cortex_utils::nitroHttpJsonResponse(res);
+            resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
+                status["status_code"].asInt()));
+            cb(resp);
+          });
   LOG_TRACE << "Done get model status";
 }
 
 void server::GetModels(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!IsEngineLoaded()) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
+  if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
     auto resp = cortex_utils::nitroHttpJsonResponse(res);
@@ -142,8 +155,9 @@ void server::GetModels(const HttpRequestPtr& req,
   }
 
   LOG_TRACE << "Start to get models";
-  if (engine_->IsSupported("GetModels")) {
-    engine_->GetModels(
+  auto& en = std::get<EngineI*>(engines_[engine_type].engine);
+  if (en->IsSupported("GetModels")) {
+    en->GetModels(
         req->getJsonObject(),
         [cb = std::move(callback)](Json::Value status, Json::Value res) {
           auto resp = cortex_utils::nitroHttpJsonResponse(res);
@@ -163,52 +177,127 @@ void server::GetModels(const HttpRequestPtr& req,
   LOG_TRACE << "Done get models";
 }
 
+void server::GetEngines(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+  Json::Value res;
+  Json::Value engine_array(Json::arrayValue);
+  for (const auto& [s, _] : engines_) {
+    Json::Value val;
+    val["id"] = s;
+    val["object"] = "engine";
+    engine_array.append(val);
+  }
+
+  res["object"] = "list";
+  res["data"] = engine_array;
+
+  auto resp = cortex_utils::nitroHttpJsonResponse(res);
+  callback(resp);
+}
+
+void server::FineTuning(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+  auto engine_type =
+      (*(req->getJsonObject())).get("engine", kPythonRuntimeEngine).asString();
+
+  if (engines_.find(engine_type) == engines_.end()) {
+    try {
+      std::string abs_path =
+          cortex_utils::GetCurrentPath() + cortex_utils::kPythonRuntimeLibPath;
+      engines_[engine_type].dl =
+          std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+    } catch (const cortex_cpp::dylib::load_error& e) {
+
+      LOG_ERROR << "Could not load engine: " << e.what();
+      engines_.erase(engine_type);
+
+      Json::Value res;
+      res["message"] = "Could not load engine " + engine_type;
+      auto resp = cortex_utils::nitroHttpJsonResponse(res);
+      resp->setStatusCode(k500InternalServerError);
+      callback(resp);
+      return;
+    }
+
+    auto func = engines_[engine_type].dl->get_function<CortexPythonEngineI*()>(
+        "get_engine");
+    engines_[engine_type].engine = func();
+    LOG_INFO << "Loaded engine: " << engine_type;
+  }
+
+  LOG_TRACE << "Start to fine-tuning";
+  auto& en = std::get<CortexPythonEngineI*>(engines_[engine_type].engine);
+  if (en->IsSupported("HandlePythonFileExecutionRequest")) {
+    en->HandlePythonFileExecutionRequest(
+        req->getJsonObject(),
+        [cb = std::move(callback)](Json::Value status, Json::Value res) {
+          auto resp = cortex_utils::nitroHttpJsonResponse(res);
+          resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
+              status["status_code"].asInt()));
+          cb(resp);
+        });
+  } else {
+    Json::Value res;
+    res["message"] = "Method is not supported yet";
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
+    resp->setStatusCode(k500InternalServerError);
+    callback(resp);
+    LOG_WARN << "Method is not supported yet";
+  }
+  LOG_TRACE << "Done fine-tuning";
+}
+
 void server::LoadModel(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback) {
   auto engine_type =
       (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
-  if (!dylib_ || engine_type != cur_engine_name_) {
-    cur_engine_name_ = engine_type;
-    // TODO: change this when we get more engines
+
+  // We have not loaded engine yet, should load it before using it
+  if (engines_.find(engine_type) == engines_.end()) {
+    // TODO(sang) we cannot run cortex.llamacpp and cortex.tensorrt-llm at the same time.
+    // So need an unload engine machanism to handle.
     auto get_engine_path = [](std::string_view e) {
       if (e == kLlamaEngine) {
-        return kLlamaLibPath;
+        return cortex_utils::kLlamaLibPath;
       }
-      return kLlamaLibPath;
+      return cortex_utils::kLlamaLibPath;
     };
 
     try {
       std::string abs_path =
-          cortex_utils::GetCurrentPath() + get_engine_path(cur_engine_name_);
-      dylib_ = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+          cortex_utils::GetCurrentPath() + get_engine_path(engine_type);
+      engines_[engine_type].dl =
+          std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+
     } catch (const cortex_cpp::dylib::load_error& e) {
       LOG_ERROR << "Could not load engine: " << e.what();
-      dylib_.reset();
-      engine_ = nullptr;
-    }
+      engines_.erase(engine_type);
 
-    if (!dylib_) {
       Json::Value res;
-      res["message"] = "Could not load engine " + cur_engine_name_;
+      res["message"] = "Could not load engine " + engine_type;
       auto resp = cortex_utils::nitroHttpJsonResponse(res);
       resp->setStatusCode(k500InternalServerError);
       callback(resp);
       return;
     }
-    auto func = dylib_->get_function<EngineI*()>("get_engine");
-    engine_ = func();
-    LOG_INFO << "Loaded engine: " << cur_engine_name_;
+
+    auto func =
+        engines_[engine_type].dl->get_function<EngineI*()>("get_engine");
+    engines_[engine_type].engine = func();
+    LOG_INFO << "Loaded engine: " << engine_type;
   }
 
   LOG_TRACE << "Load model";
-  engine_->LoadModel(
-      req->getJsonObject(),
-      [cb = std::move(callback)](Json::Value status, Json::Value res) {
-        auto resp = cortex_utils::nitroHttpJsonResponse(res);
-        resp->setStatusCode(
-            static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
-        cb(resp);
-      });
+  auto& en = std::get<EngineI*>(engines_[engine_type].engine);
+  en->LoadModel(req->getJsonObject(), [cb = std::move(callback)](
+                                          Json::Value status, Json::Value res) {
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
+    resp->setStatusCode(
+        static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
+    cb(resp);
+  });
   LOG_TRACE << "Done load model";
 }
 
@@ -255,8 +344,8 @@ void server::ProcessNonStreamRes(std::function<void(const HttpResponsePtr&)> cb,
   cb(resp);
 }
 
-bool server::IsEngineLoaded() {
-  return !!engine_;
+bool server::IsEngineLoaded(const std::string& e) {
+  return engines_.find(e) != engines_.end();
 }
 
 }  // namespace inferences
\ No newline at end of file
diff --git a/cortex-cpp/controllers/server.h b/cortex-cpp/controllers/server.h
index c6d67ee30..3f214a9f0 100644
--- a/cortex-cpp/controllers/server.h
+++ b/cortex-cpp/controllers/server.h
@@ -14,9 +14,11 @@
 #include <condition_variable>
 #include <cstddef>
 #include <string>
+#include <variant>
 
 #include "common/base.h"
 #include "cortex-common/EngineI.h"
+#include "cortex-common/cortexpythoni.h"
 #include "trantor/utils/SerialTaskQueue.h"
 #include "utils/dylib.h"
 #include "utils/json.hpp"
@@ -31,9 +33,9 @@ using namespace drogon;
 namespace inferences {
 
 class server : public drogon::HttpController<server>,
-                 public BaseModel,
-                 public BaseChatCompletion,
-                 public BaseEmbedding {
+               public BaseModel,
+               public BaseChatCompletion,
+               public BaseEmbedding {
   struct SyncQueue;
 
  public:
@@ -47,11 +49,16 @@ class server : public drogon::HttpController<server>,
   METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
   METHOD_ADD(server::ModelStatus, "modelstatus", Post);
   METHOD_ADD(server::GetModels, "models", Get);
-  
+  METHOD_ADD(server::GetEngines, "engines", Get);
+
+  // cortex.python API
+  METHOD_ADD(server::FineTuning, "finetuning", Post);
 
   // Openai compatible path
   ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
   ADD_METHOD_TO(server::GetModels, "/v1/models", Get);
+  ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post);
+
   // ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options);
   // NOTE: prelight will be added back when browser support is properly planned
 
@@ -78,13 +85,19 @@ class server : public drogon::HttpController<server>,
   void GetModels(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) override;
+  void GetEngines(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) override;
+  void FineTuning(
+      const HttpRequestPtr& req,
+      std::function<void(const HttpResponsePtr&)>&& callback) override;
 
  private:
   void ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
                         std::shared_ptr<SyncQueue> q);
   void ProcessNonStreamRes(std::function<void(const HttpResponsePtr&)> cb,
                            SyncQueue& q);
-  bool IsEngineLoaded();
+  bool IsEngineLoaded(const std::string& e);
 
  private:
   struct SyncQueue {
@@ -126,8 +139,11 @@ class server : public drogon::HttpController<server>,
   };
 
  private:
-  std::unique_ptr<cortex_cpp::dylib> dylib_;
-  EngineI* engine_;
-  std::string cur_engine_name_;
+  using EngineV = std::variant<EngineI*, CortexPythonEngineI*>;
+  struct EngineInfo {
+    std::unique_ptr<cortex_cpp::dylib> dl;
+    EngineV engine;
+  };
+  std::unordered_map<std::string, EngineInfo> engines_;
 };
 };  // namespace inferences
\ No newline at end of file
diff --git a/cortex-cpp/cortex-common/EngineI.h b/cortex-cpp/cortex-common/EngineI.h
index 4246c8ade..c5dcc8afe 100644
--- a/cortex-cpp/cortex-common/EngineI.h
+++ b/cortex-cpp/cortex-common/EngineI.h
@@ -10,20 +10,21 @@ class EngineI {
  public:
   virtual ~EngineI() {}
 
+  // cortex.llamacpp interface
   virtual void HandleChatCompletion(
-      std::shared_ptr<Json::Value> jsonBody,
+      std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
   virtual void HandleEmbedding(
-      std::shared_ptr<Json::Value> jsonBody,
+      std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
   virtual void LoadModel(
-      std::shared_ptr<Json::Value> jsonBody,
+      std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
   virtual void UnloadModel(
-      std::shared_ptr<Json::Value> jsonBody,
+      std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
   virtual void GetModelStatus(
-      std::shared_ptr<Json::Value> jsonBody,
+      std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
 
   // For backward compatible checking
diff --git a/cortex-cpp/cortex-common/cortexpythoni.h b/cortex-cpp/cortex-common/cortexpythoni.h
new file mode 100644
index 000000000..06a79838f
--- /dev/null
+++ b/cortex-cpp/cortex-common/cortexpythoni.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "json/value.h"
+
+class CortexPythonEngineI {
+ public:
+  virtual ~CortexPythonEngineI() {}
+
+  virtual bool IsSupported(const std::string& f) = 0;
+
+  virtual void ExecutePythonFile(std::string binary_execute_path,
+                                 std::string file_execution_path,
+                                 std::string python_library_path) = 0;
+
+  virtual void HandlePythonFileExecutionRequest(
+      std::shared_ptr<Json::Value> json_body,
+      std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;  
+};
+
diff --git a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt
index 4e080a026..d6feb5991 100644
--- a/cortex-cpp/cortex-cpp-deps/CMakeLists.txt
+++ b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt
@@ -66,7 +66,7 @@ ExternalProject_Add(
 ExternalProject_Add(
     drogon
     GIT_REPOSITORY https://github.com/drogonframework/drogon
-    GIT_TAG v1.9.2
+    GIT_TAG v1.9.4
     CMAKE_ARGS
     -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
 	-DOPENSSL_USE_STATIC_LIBS=TRUE
diff --git a/cortex-cpp/engines/cortex.python/engine.cmake b/cortex-cpp/engines/cortex.python/engine.cmake
new file mode 100644
index 000000000..fa6705fde
--- /dev/null
+++ b/cortex-cpp/engines/cortex.python/engine.cmake
@@ -0,0 +1,38 @@
+# cortex.python release version
+set(VERSION 0.1.5)
+set(ENGINE_VERSION v${VERSION})
+set(ENGINE_NAME cortex.python)
+
+# MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION})
+
+# Download library based on instructions 
+if(UNIX AND NOT APPLE) 
+  set(LIBRARY_NAME ${ENGINE_NAME}-${VERSION}-linux-amd64.tar.gz)
+elseif(UNIX)
+  if(MAC_ARM64) 
+    set(LIBRARY_NAME ${ENGINE_NAME}-${VERSION}-mac-arm64.tar.gz)
+  else()
+    set(LIBRARY_NAME ${ENGINE_NAME}-${VERSION}-mac-amd64.tar.gz)
+  endif()
+else()
+  set(LIBRARY_NAME ${ENGINE_NAME}-${VERSION}-windows-amd64.tar.gz)
+endif()
+
+
+set(LIBPYTHONRUNTIME_ENGINE_URL https://github.com/janhq/cortex.python/releases/download/${ENGINE_VERSION}/${LIBRARY_NAME})
+MESSAGE("LIBPYTHONRUNTIME_ENGINE_URL=" ${LIBPYTHONRUNTIME_ENGINE_URL})
+MESSAGE("LIBARRY_NAME=" ${LIBRARY_NAME})
+set(LIBPYTHONRUNTIME_ENGINE_PATH ${CMAKE_BINARY_DIR}/engines/${LIBRARY_NAME})
+
+# MESSAGE("CMAKE_BINARY_DIR = " ${CMAKE_BINARY_DIR})
+
+file(DOWNLOAD ${LIBPYTHONRUNTIME_ENGINE_URL} ${LIBPYTHONRUNTIME_ENGINE_PATH} STATUS LIBPYTHONRUNTIME_ENGINE_DOWNLOAD_STATUS)
+list(GET LIBPYTHONRUNTIME_ENGINE_DOWNLOAD_STATUS 0 LIBPYTHONRUNTIME_ENGINE_DOWNLOAD_STATUS_NO)
+# MESSAGE("file = " ${CMAKE_BINARY_DIR}/engines/${LIBRARY_NAME})
+
+if(LIBPYTHONRUNTIME_ENGINE_DOWNLOAD_STATUS_NO)
+    message(STATUS "Pre-built library not downloaded. (${LIBPYTHONRUNTIME_ENGINE_DOWNLOAD_STATUS})")
+else()
+    message(STATUS "Linking downloaded pre-built library.")
+    file(ARCHIVE_EXTRACT INPUT ${CMAKE_BINARY_DIR}/engines/${LIBRARY_NAME} DESTINATION ${CMAKE_BINARY_DIR}/engines/)
+endif()
\ No newline at end of file
diff --git a/cortex-cpp/main.cc b/cortex-cpp/main.cc
index 12cabeb0c..04190d92d 100644
--- a/cortex-cpp/main.cc
+++ b/cortex-cpp/main.cc
@@ -2,7 +2,9 @@
 #include <drogon/drogon.h>
 #include <climits>  // for PATH_MAX
 #include <iostream>
+#include "cortex-common/cortexpythoni.h"
 #include "utils/cortex_utils.h"
+#include "utils/dylib.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -18,6 +20,27 @@
 #endif
 
 int main(int argc, char* argv[]) {
+  // Check if this process is for python execution
+  if (argc > 1) {
+    if (strcmp(argv[1], "--run_python_file") == 0) {
+      std::string py_home_path = (argc > 3) ? argv[3] : "";
+      std::unique_ptr<cortex_cpp::dylib> dl;
+      try {
+        std::string abs_path = cortex_utils::GetCurrentPath() +
+                               cortex_utils::kPythonRuntimeLibPath;
+        dl = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+      } catch (const cortex_cpp::dylib::load_error& e) {
+        LOG_ERROR << "Could not load engine: " << e.what();
+        return 1;
+      }
+
+      auto func = dl->get_function<CortexPythonEngineI*()>("get_engine");
+      auto e = func();
+      e->ExecutePythonFile(argv[0], argv[2], py_home_path);
+      return 0;
+    }
+  }
+
   int thread_num = 1;
   std::string host = "127.0.0.1";
   int port = 3928;
diff --git a/cortex-cpp/utils/cortex_utils.h b/cortex-cpp/utils/cortex_utils.h
index 3c4fdd2dd..c0670a431 100644
--- a/cortex-cpp/utils/cortex_utils.h
+++ b/cortex-cpp/utils/cortex_utils.h
@@ -25,6 +25,8 @@
 #endif
 
 namespace cortex_utils {
+constexpr static auto kLlamaLibPath = "/engines/cortex.llamacpp";
+constexpr static auto kPythonRuntimeLibPath = "/engines/cortex.python";
 
 inline std::string models_folder = "./models";