diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 85050581a..e9fd8664b 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -5,6 +5,8 @@ on:
     types: [opened, synchronize, reopened, ready_for_review]
     paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
   workflow_dispatch:
+  schedule:
+    - cron: '0 22 * * *'
 
 env:
   LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
@@ -102,7 +104,7 @@ jobs:
           cd engine
           echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" > ~/.cortexrc
           echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.cortexrc
-          ./build/cortex
+          # ./build/cortex
           cat ~/.cortexrc
 
       - name: Run unit tests
@@ -115,10 +117,10 @@ jobs:
       - name: Run setup config
         run: |
           cd engine
-          echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" > ~/.cortexrc
+          echo "apiServerPort: 3928" > ~/.cortexrc
+          echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" >> ~/.cortexrc
           echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.cortexrc
-          echo "apiServerPort: 3928" >> ~/.cortexrc
-          ./build/cortex
+          # ./build/cortex
           cat ~/.cortexrc
 
       - name: Run e2e tests
@@ -149,6 +151,34 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
 
+      - name: Run e2e tests
+        if: github.event_name == 'schedule' && runner.os != 'Windows' && github.event.pull_request.draft == false
+        run: |
+          cd engine
+          cp build/cortex build/cortex-nightly
+          cp build/cortex build/cortex-beta
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          rm build/cortex-nightly
+          rm build/cortex-beta
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+
+      - name: Run e2e tests
+        if: github.event_name == 'schedule' &&  runner.os == 'Windows' && github.event.pull_request.draft == false
+        run: |
+          cd engine
+          cp build/cortex.exe build/cortex-nightly.exe
+          cp build/cortex.exe build/cortex-beta.exe
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          rm build/cortex-nightly.exe
+          rm build/cortex-beta.exe
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
+
       - name: Pre-package
         run: |
           cd engine
@@ -188,40 +218,58 @@ jobs:
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-  # build-docker-and-test:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Getting the repo
-  #       uses: actions/checkout@v3
-  #       with:
-  #         submodules: 'recursive'
+  build-docker-and-test:
+    runs-on: ubuntu-24-04-docker
+    steps:
+      - name: Getting the repo
+        uses: actions/checkout@v3
+        with:
+          submodules: 'recursive'
+      
+      - name: Run Docker
+        if: github.event_name != 'schedule'
+        run: |
+          docker build \
+            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+            -t menloltd/cortex:test -f docker/Dockerfile.cache .
+          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+          sleep 20
 
-  #     - name: Set up QEMU
-  #       uses: docker/setup-qemu-action@v3
+      - name: Run Docker
+        if: github.event_name == 'schedule'
+        run: |
+          latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
+          echo "cortex.llamacpp latest release: $latest_prerelease"
+          docker build \
+            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+            --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
+            -t menloltd/cortex:test -f docker/Dockerfile.cache .
+          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+          sleep 20
 
-  #     - name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-      
-  #     - name: Run Docker
-  #       run: |
-  #         docker build -t menloltd/cortex:test -f docker/Dockerfile .
-  #         docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
-
-  #     - name: use python
-  #       uses: actions/setup-python@v5
-  #       with:
-  #         python-version: "3.10"
-
-  #     - name: Run e2e tests
-  #       run: |
-  #         cd engine
-  #         python -m pip install --upgrade pip
-  #         python -m pip install -r e2e-test/requirements.txt
-  #         pytest e2e-test/test_api_docker.py
-
-  #     - name: Run Docker
-  #       continue-on-error: true
-  #       if: always()  
-  #       run: |
-  #         docker stop cortex
-  #         docker rm cortex
+      - name: use python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Run e2e tests
+        run: |
+          cd engine
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          pytest e2e-test/test_api_docker.py
+
+      - name: Run Docker
+        continue-on-error: true
+        if: always()  
+        run: |
+          docker logs cortex
+          docker stop cortex
+          docker rm cortex
+          echo "y\n" | docker system prune -af
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4cbdbef29..744c3899c 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,16 +1,12 @@
-FROM ubuntu:22.04 as base
-
-FROM base as build
-
-ARG CORTEX_CPP_VERSION=latest
-
-ARG CMAKE_EXTRA_FLAGS=""
+# Stage 1: Base dependencies (common stage)
+FROM ubuntu:22.04 as common
 
 ENV DEBIAN_FRONTEND=noninteractive
 
-# Install dependencies
+# Install common dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     ca-certificates \
+    software-properties-common \
     curl \
     wget \
     jq \
@@ -20,11 +16,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-RUN apt-get update && apt-get install -y --no-install-recommends \
+# Stage 2: Build dependencies and compilation
+FROM common as build
+
+# Install Dependencies
+RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
+    apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+    cmake \
+    make \
     git \
     uuid-dev \
     lsb-release \
-    software-properties-common \
     gpg \
     zip \
     unzip \
@@ -32,59 +36,45 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     g++ \
     ninja-build \
     pkg-config \
+    python3-pip \
     openssl && \
+    pip3 install awscli && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
-    apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \
-    apt-get update && \
-    apt-get install -y cmake && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+ARG CORTEX_CPP_VERSION=latest
+ARG CMAKE_EXTRA_FLAGS=""
 
 WORKDIR /app
 
+# Copy source code
 COPY ./engine /app/engine
-
 COPY ./docs/static/openapi/cortex.json /app/docs/static/openapi/cortex.json
 
+# Build project
+# Configure vcpkg binary sources
 RUN cd engine && make configure-vcpkg && make build CMAKE_EXTRA_FLAGS="-DCORTEX_CPP_VERSION=${CORTEX_CPP_VERSION} -DCMAKE_BUILD_TEST=OFF -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake ${CMAKE_EXTRA_FLAGS}"
 
-FROM base as runtime
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-# Install dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    ca-certificates \
-    curl \
-    wget \
-    jq \
-    tar \
-    openmpi-bin \
-    libopenmpi-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-ARG CORTEX_LLAMACPP_VERSION=latest
-
+# Stage 3: Runtime
+FROM common as runtime
+WORKDIR /app
 COPY --from=build /app/engine/build/cortex /usr/local/bin/cortex
 COPY --from=build /app/engine/build/cortex-server /usr/local/bin/cortex-server
 
 COPY ./docker/download-cortex.llamacpp.sh /tmp/download-cortex.llamacpp.sh
+COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh
 
-# Get the latest version of the Cortex Llama
+# Get the latest version of Cortex Llama
+ARG CORTEX_LLAMACPP_VERSION=latest
 RUN chmod +x /tmp/download-cortex.llamacpp.sh && /bin/bash /tmp/download-cortex.llamacpp.sh ${CORTEX_LLAMACPP_VERSION}
 
-# Copy the entrypoint script
-COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh
-
+# Configure entrypoint
 RUN chmod +x /usr/local/bin/entrypoint.sh
 
 EXPOSE 39281
 
+# Healthcheck
 HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \
     CMD curl -f http://127.0.0.1:39281/healthz || exit 1
-  
+
 ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/docker/Dockerfile.cache b/docker/Dockerfile.cache
new file mode 100644
index 000000000..0a9cbe02d
--- /dev/null
+++ b/docker/Dockerfile.cache
@@ -0,0 +1,121 @@
+# Stage 1: Base dependencies (common stage)
+FROM ubuntu:22.04 as common
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install common dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    software-properties-common \
+    curl \
+    wget \
+    jq \
+    tar \
+    openmpi-bin \
+    libopenmpi-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Stage 2: Build dependencies and compilation
+FROM common as build
+
+# Install Dependencies
+RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
+    apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+    cmake \
+    make \
+    git \
+    uuid-dev \
+    lsb-release \
+    gpg \
+    zip \
+    unzip \
+    gcc \
+    g++ \
+    ninja-build \
+    pkg-config \
+    python3-pip \
+    openssl && \
+    pip3 install awscli && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+ARG CORTEX_CPP_VERSION=latest
+ARG CMAKE_EXTRA_FLAGS=""
+
+WORKDIR /app
+
+# Build arguments for remote cache
+ARG REMOTE_CACHE_URL=""
+ARG MINIO_ENDPOINT_URL=""
+ARG MINIO_ACCESS_KEY=""
+ARG MINIO_SECRET_KEY=""
+
+# Configure cache
+ENV LOCAL_CACHE_DIR="/vcpkg-cache"
+RUN mkdir -p ${LOCAL_CACHE_DIR}
+
+# Configure MinIO alias (if remote cache is provided)
+RUN if [ -n "${REMOTE_CACHE_URL}" ]; then \
+      echo "Setting up MinIO for remote cache..." && \
+      aws configure set default.s3.signature_version s3v4 && \
+      aws configure set aws_access_key_id ${MINIO_ACCESS_KEY} && \
+      aws configure set aws_secret_access_key ${MINIO_SECRET_KEY} && \
+      aws configure set default.region us-east-1; \
+    else \
+      echo "No remote cache provided, using local fallback..."; \
+    fi
+
+# Sync cache from MinIO (if remote cache is provided)
+RUN if [ -n "${REMOTE_CACHE_URL}" ]; then \
+      echo "Downloading cache from MinIO..." && \
+      aws --endpoint-url=${MINIO_ENDPOINT_URL} s3 sync s3://vcpkg-cache ${LOCAL_CACHE_DIR}; \
+    else \
+      echo "No remote cache provided, skipping download."; \
+    fi
+
+# Copy source code
+COPY ./engine /app/engine
+COPY ./docs/static/openapi/cortex.json /app/docs/static/openapi/cortex.json
+
+# Build project
+# Configure vcpkg binary sources
+RUN export VCPKG_BINARY_SOURCES="files,${LOCAL_CACHE_DIR},readwrite;default"; \
+    cd engine && make configure-vcpkg && make build CMAKE_EXTRA_FLAGS="-DCORTEX_CPP_VERSION=${CORTEX_CPP_VERSION} -DCMAKE_BUILD_TEST=OFF -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake ${CMAKE_EXTRA_FLAGS}"
+
+    # Upload updated cache to MinIO (if remote cache is provided)
+RUN if [ -n "${REMOTE_CACHE_URL}" ]; then \
+      echo "Uploading cache to MinIO..." && \
+      aws --endpoint-url=${MINIO_ENDPOINT_URL} s3 sync ${LOCAL_CACHE_DIR} s3://vcpkg-cache; \
+    else \
+      echo "No remote cache provided, skipping upload."; \
+    fi
+
+# Stage 3: Runtime
+FROM common as runtime
+
+WORKDIR /app
+
+# Copy built binaries from the build stage
+COPY --from=build /app/engine/build/cortex /usr/local/bin/cortex
+COPY --from=build /app/engine/build/cortex-server /usr/local/bin/cortex-server
+
+COPY ./docker/download-cortex.llamacpp.sh /tmp/download-cortex.llamacpp.sh
+COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh
+
+# Get the latest version of Cortex Llama
+ARG CORTEX_LLAMACPP_VERSION=latest
+RUN chmod +x /tmp/download-cortex.llamacpp.sh && /bin/bash /tmp/download-cortex.llamacpp.sh ${CORTEX_LLAMACPP_VERSION}
+
+# Configure entrypoint
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+EXPOSE 39281
+
+# Healthcheck
+HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \
+    CMD curl -f http://127.0.0.1:39281/healthz || exit 1
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 6461eb15b..6f0493ec2 100644
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -5,10 +5,11 @@
 echo "apiServerHost: 0.0.0.0" > /root/.cortexrc
 echo "enableCors: true" >> /root/.cortexrc
 
+# Install the engine
 cortex engines install llama-cpp -s /opt/cortex.llamacpp
+cortex engines list
 
 # Start the cortex server
-
 cortex start
 
 # Keep the container running by tailing the log files
diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx
index 2ebf53c7b..874309ad4 100644
--- a/docs/docs/quickstart.mdx
+++ b/docs/docs/quickstart.mdx
@@ -80,14 +80,13 @@ All model files are stored in the `~/cortex/models` folder.
 curl http://localhost:39281/v1/chat/completions \
 -H "Content-Type: application/json" \
 -d '{
-  "model": "",
+  "model": "llama3.1:8b-gguf",
   "messages": [
     {
       "role": "user",
       "content": "Hello"
     },
   ],
-  "model": "mistral",
   "stream": true,
   "max_tokens": 1,
   "stop": [
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 1ac69d78e..da31ab64b 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -1234,9 +1234,9 @@
     },
     "/v1/engines/{name}": {
       "get": {
-        "operationId": "EnginesController_findOne",
-        "summary": "Get an engine",
-        "description": "Retrieves an engine instance, providing basic information about the engine.",
+        "operationId": "EnginesController_listInstalledEngines",
+        "summary": "List installed engines",
+        "description": "List installed engines for a particular engine type.",
         "parameters": [
           {
             "name": "name",
@@ -1292,10 +1292,12 @@
           }
         },
         "tags": ["Engines"]
-      },
-      "post": {
-        "summary": "Install an engine",
-        "description": "Install an engine of a specific type, with optional version and variant",
+      }
+    },
+    "/v1/engines/{name}/releases": {
+      "get": {
+        "summary": "List released engines",
+        "description": "List released engines of a specific engine type.",
         "parameters": [
           {
             "name": "name",
@@ -1307,32 +1309,156 @@
               "default": "llama-cpp"
             },
             "description": "The type of engine"
-          },
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "draft": {
+                        "type": "boolean",
+                        "example": false
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "v0.1.39-20.11.24"
+                      },
+                      "prerelease": {
+                        "type": "boolean",
+                        "example": true
+                      },
+                      "published_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "example": "2024-11-20T17:39:40Z"
+                      },
+                      "url": {
+                        "type": "string",
+                        "example": "https://api.github.com/repos/janhq/cortex.llamacpp/releases/186479804"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/v1/engines/{name}/releases/latest": {
+      "get": {
+        "summary": "Get latest release",
+        "description": "Return variants for the latest engine release of a specific engine type.",
+        "parameters": [
           {
-            "name": "version",
-            "in": "query",
-            "required": false,
+            "name": "name",
+            "in": "path",
+            "required": true,
             "schema": {
-              "type": "string"
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
             },
-            "description": "The version of the engine to install (optional)"
-          },
+            "description": "The type of engine"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "created_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "example": "2024-11-15T10:39:39Z"
+                      },
+                      "download_count": {
+                        "type": "integer",
+                        "example": 76
+                      },
+                      "name": {
+                        "type": "string",
+                        "example": "0.1.39-linux-amd64-avx-cuda-11-7"
+                      },
+                      "size": {
+                        "type": "integer",
+                        "example": 151215080
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
+    "/v1/engines/{name}/install": {
+      "post": {
+        "summary": "Install an engine",
+        "description": "Install an engine of a specific type, with optional version and variant. If none are provided, the latest version and most suitable variant will be installed.",
+        "parameters": [
           {
-            "name": "variant",
-            "in": "query",
-            "required": false,
+            "name": "name",
+            "in": "path",
+            "required": true,
             "schema": {
-              "type": "string"
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
             },
-            "description": "The variant of the engine to install (optional)"
+            "description": "The type of engine"
           }
         ],
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine to install (optional)",
+                    "example": "v0.1.39"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine to install (optional)",
+                    "example": "mac-arm64"
+                  }
+                }
+              }
+            }
+          }
+        },
         "responses": {
           "200": {
             "description": "Successful installation",
             "content": {
               "application/json": {
-                "schema": {}
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine starts installing!"
+                    }
+                  }
+                }
               }
             }
           }
@@ -1341,7 +1467,7 @@
       },
       "delete": {
         "summary": "Uninstall an engine",
-        "description": "Uninstall an engine based on type, version, and variant",
+        "description": "Uninstall an engine based on engine, version, and variant. If version and variant are not provided, all versions and variants of the engine will be uninstalled.",
         "parameters": [
           {
             "name": "name",
@@ -1353,26 +1479,30 @@
               "default": "llama-cpp"
             },
             "description": "The type of engine"
-          },
-          {
-            "name": "version",
-            "in": "query",
-            "required": false,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The version of the engine to uninstall (optional)"
-          },
-          {
-            "name": "variant",
-            "in": "query",
-            "required": false,
-            "schema": {
-              "type": "string"
-            },
-            "description": "The variant of the engine to uninstall (optional)"
           }
         ],
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "version": {
+                    "type": "string",
+                    "description": "The version of the engine to uninstall (optional)",
+                    "example": "v0.1.39"
+                  },
+                  "variant": {
+                    "type": "string",
+                    "description": "The variant of the engine to uninstall (optional)",
+                    "example": "mac-arm64"
+                  }
+                }
+              }
+            }
+          }
+        },
         "responses": {
           "200": {
             "description": "Successful uninstallation",
@@ -1381,28 +1511,10 @@
                 "schema": {
                   "type": "object",
                   "properties": {
-                    "success": {
-                      "type": "boolean",
-                      "description": "Indicates if the uninstallation was successful"
-                    },
                     "message": {
                       "type": "string",
-                      "description": "Description of the uninstallation action taken"
-                    },
-                    "uninstalledEngines": {
-                      "type": "array",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "version": {
-                            "type": "string"
-                          },
-                          "variant": {
-                            "type": "string"
-                          }
-                        }
-                      },
-                      "description": "List of uninstalled engine versions and variants"
+                      "description": "Engine llama-cpp uninstalled successfully!",
+                      "example": "Engine llama-cpp uninstalled successfully!"
                     }
                   }
                 }
@@ -1429,6 +1541,44 @@
         "tags": ["Engines"]
       }
     },
+    "/v1/engines/{name}/update": {
+      "post": {
+        "summary": "Update engine",
+        "description": "Updates the specified engine type using the engine variant currently set as default.",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The name of the engine to update"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Engine updated successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Engine updated successfully"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
     "/v1/engines/{name}/default": {
       "get": {
         "summary": "Get default engine variant",
@@ -1603,44 +1753,6 @@
         "tags": ["Engines"]
       }
     },
-    "/v1/engines/{name}/update": {
-      "post": {
-        "summary": "Update engine",
-        "description": "Updates the specified engine type using the engine variant currently set as default.",
-        "parameters": [
-          {
-            "name": "name",
-            "in": "path",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
-              "default": "llama-cpp"
-            },
-            "description": "The name of the engine to update"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Engine updated successfully",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "object",
-                  "properties": {
-                    "message": {
-                      "type": "string",
-                      "example": "Engine updated successfully"
-                    }
-                  }
-                }
-              }
-            }
-          }
-        },
-        "tags": ["Engines"]
-      }
-    },
     "/v1/hardware": {
       "get": {
         "summary": "Get hardware information",
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 92e07ec91..b53eb7fdf 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -28,6 +28,10 @@ if(MSVC)
       $<$<CONFIG:Debug>:/MTd> #---|-- Statically link the runtime libraries
       $<$<CONFIG:Release>:/MT> #--|
   )
+
+  add_compile_options(/utf-8)
+  add_definitions(-DUNICODE -D_UNICODE)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DUNICODE /D_UNICODE")
 endif()
 
 if(NOT DEFINED CORTEX_VARIANT)
@@ -173,10 +177,11 @@ aux_source_directory(models MODEL_SRC)
 aux_source_directory(cortex-common CORTEX_COMMON)
 aux_source_directory(config CONFIG_SRC)
 aux_source_directory(database DB_SRC)
+aux_source_directory(migrations MIGR_SRC)
  
 target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} )
 
-target_sources(${TARGET_NAME} PRIVATE ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC})
+target_sources(${TARGET_NAME} PRIVATE ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC} ${MIGR_SRC})
 
 set_target_properties(${TARGET_NAME} PROPERTIES
                       RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}
diff --git a/engine/README.md b/engine/README.md
index 192e95612..884e5efae 100644
--- a/engine/README.md
+++ b/engine/README.md
@@ -173,4 +173,4 @@ Manual build is a process in which the developers build the software manually. T
 
 ## Star History
 
-[![Star History Chart](https://api.star-history.com/svg?repos=janhq/cortex-cpp&type=Date)](https://star-history.com/#janhq/cortex-cpp&Date)
+[![Star History Chart](https://api.star-history.com/svg?repos=janhq/cortex.cpp&type=Date)](https://star-history.com/#janhq/cortex.cpp&Date)
\ No newline at end of file
diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index ce6f254ca..c69e7e150 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -26,6 +26,10 @@ if(MSVC)
       $<$<CONFIG:Debug>:/MTd> #---|-- Statically link the runtime libraries
       $<$<CONFIG:Release>:/MT> #--|
   )
+
+  add_compile_options(/utf-8)
+  add_definitions(-DUNICODE -D_UNICODE)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DUNICODE /D_UNICODE")
 endif()
 
 if(NOT DEFINED CORTEX_VARIANT)
diff --git a/engine/cli/commands/engine_get_cmd.cc b/engine/cli/commands/engine_get_cmd.cc
index 8699c336b..3fd1fd576 100644
--- a/engine/cli/commands/engine_get_cmd.cc
+++ b/engine/cli/commands/engine_get_cmd.cc
@@ -2,8 +2,8 @@
 #include <json/reader.h>
 #include <json/value.h>
 #include <iostream>
+#include "common/engine_servicei.h"
 #include "server_start_cmd.h"
-#include "services/engine_service.h"
 #include "utils/curl_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/url_parser.h"
diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc
index 1f712d10c..477e38ee2 100644
--- a/engine/cli/commands/engine_install_cmd.cc
+++ b/engine/cli/commands/engine_install_cmd.cc
@@ -45,28 +45,28 @@ bool EngineInstallCmd::Exec(const std::string& engine,
       }
     });
 
-    auto versions_url = url_parser::Url{
+    auto releases_url = url_parser::Url{
         .protocol = "http",
         .host = host_ + ":" + std::to_string(port_),
-        .pathParams = {"v1", "engines", engine, "versions"},
+        .pathParams = {"v1", "engines", engine, "releases"},
     };
-    auto versions_result = curl_utils::SimpleGetJson(versions_url.ToFullPath());
-    if (versions_result.has_error()) {
-      CTL_ERR(versions_result.error());
+    auto releases_result = curl_utils::SimpleGetJson(releases_url.ToFullPath());
+    if (releases_result.has_error()) {
+      CTL_ERR(releases_result.error());
       return false;
     }
     std::vector<std::string> version_selections;
-    for (const auto& release_version : versions_result.value()) {
+    for (const auto& release_version : releases_result.value()) {
       version_selections.push_back(release_version["name"].asString());
     }
 
-    auto selected_version =
+    auto selected_release =
         cli_selection_utils::PrintSelection(version_selections);
-    if (selected_version == std::nullopt) {
+    if (selected_release == std::nullopt) {
       CTL_ERR("Invalid version selection");
       return false;
     }
-    std::cout << "Selected version: " << selected_version.value() << std::endl;
+    std::cout << "Selected version: " << selected_release.value() << std::endl;
 
     auto variant_url = url_parser::Url{
         .protocol = "http",
@@ -76,8 +76,8 @@ bool EngineInstallCmd::Exec(const std::string& engine,
                 "v1",
                 "engines",
                 engine,
-                "versions",
-                selected_version.value(),
+                "releases",
+                selected_release.value(),
             },
     };
     auto variant_result = curl_utils::SimpleGetJson(variant_url.ToFullPath());
@@ -113,23 +113,25 @@ bool EngineInstallCmd::Exec(const std::string& engine,
       return false;
     }
     std::cout << "Selected " << selected_variant.value() << " - "
-              << selected_version.value() << std::endl;
-
-    auto install_url =
-        url_parser::Url{.protocol = "http",
-                        .host = host_ + ":" + std::to_string(port_),
-                        .pathParams =
-                            {
-                                "v1",
-                                "engines",
-                                engine,
-                            },
-                        .queries = {
-                            {"version", selected_version.value()},
-                            {"variant", selected_variant.value()},
-                        }};
-
-    auto response = curl_utils::SimplePostJson(install_url.ToFullPath());
+              << selected_release.value() << std::endl;
+
+    auto install_url = url_parser::Url{
+        .protocol = "http",
+        .host = host_ + ":" + std::to_string(port_),
+        .pathParams =
+            {
+                "v1",
+                "engines",
+                engine,
+                "install",
+            },
+    };
+    Json::Value body;
+    body["version"] = selected_release.value();
+    body["variant"] = selected_variant.value();
+
+    auto response = curl_utils::SimplePostJson(install_url.ToFullPath(),
+                                               body.toStyledString());
     if (response.has_error()) {
       CTL_ERR(response.error());
       return false;
@@ -163,14 +165,17 @@ bool EngineInstallCmd::Exec(const std::string& engine,
               "v1",
               "engines",
               engine,
+              "install",
           },
   };
 
+  Json::Value body;
   if (!version.empty()) {
-    install_url.queries = {{"version", version}};
+    body["version"] = version;
   }
 
-  auto response = curl_utils::SimplePostJson(install_url.ToFullPath());
+  auto response = curl_utils::SimplePostJson(install_url.ToFullPath(),
+                                             body.toStyledString());
   if (response.has_error()) {
     // TODO: namh refactor later
     Json::Value root;
@@ -183,8 +188,6 @@ bool EngineInstallCmd::Exec(const std::string& engine,
     return false;
   }
 
-  CLI_LOG("Validating download items, please wait..")
-
   if (!dp_res.get())
     return false;
 
diff --git a/engine/cli/commands/engine_uninstall_cmd.cc b/engine/cli/commands/engine_uninstall_cmd.cc
index 1ef5580a8..ef9c95af8 100644
--- a/engine/cli/commands/engine_uninstall_cmd.cc
+++ b/engine/cli/commands/engine_uninstall_cmd.cc
@@ -17,9 +17,10 @@ void EngineUninstallCmd::Exec(const std::string& host, int port,
     }
   }
 
-  auto url = url_parser::Url{.protocol = "http",
-                             .host = host + ":" + std::to_string(port),
-                             .pathParams = {"v1", "engines", engine}};
+  auto url =
+      url_parser::Url{.protocol = "http",
+                      .host = host + ":" + std::to_string(port),
+                      .pathParams = {"v1", "engines", engine, "install"}};
 
   auto result = curl_utils::SimpleDeleteJson(url.ToFullPath());
   if (result.has_error()) {
diff --git a/engine/cli/commands/engine_use_cmd.cc b/engine/cli/commands/engine_use_cmd.cc
index d03f9ddc0..50735739d 100644
--- a/engine/cli/commands/engine_use_cmd.cc
+++ b/engine/cli/commands/engine_use_cmd.cc
@@ -55,22 +55,29 @@ cpp::result<void, std::string> EngineUseCmd::Exec(const std::string& host,
     return cpp::fail("Invalid variant selection");
   }
 
-  auto selected_version = cli_selection_utils::PrintSelection(
-      variant_map[selected_variant.value()]);
-  if (!selected_variant.has_value()) {
+  std::optional<std::string> selected_version = std::nullopt;
+  if (variant_map[selected_variant.value()].size() == 1) {
+    selected_version = variant_map[selected_variant.value()][0];
+  } else {
+    selected_version = cli_selection_utils::PrintSelection(
+        variant_map[selected_variant.value()]);
+  }
+  if (!selected_version.has_value()) {
     CTL_ERR("Invalid version selection");
     return cpp::fail("Invalid version selection");
   }
 
-  auto set_default_engine_variant =
-      url_parser::Url{.protocol = "http",
-                      .host = host + ":" + std::to_string(port),
-                      .pathParams = {"v1", "engines", engine, "default"},
-                      .queries = {{"version", selected_version.value()},
-                                  {"variant", selected_variant.value()}}};
+  Json::Value body;
+  body["variant"] = selected_variant.value();
+  body["version"] = selected_version.value();
+  auto set_default_engine_variant = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "engines", engine, "default"},
+  };
 
-  auto response =
-      curl_utils::SimplePostJson(set_default_engine_variant.ToFullPath());
+  auto response = curl_utils::SimplePostJson(
+      set_default_engine_variant.ToFullPath(), body.toStyledString());
   if (response.has_error()) {
     CTL_ERR(response.error());
     return cpp::fail("Failed to set default engine variant");
diff --git a/engine/cli/commands/model_import_cmd.cc b/engine/cli/commands/model_import_cmd.cc
index 6abaad2c8..f8cf6a810 100644
--- a/engine/cli/commands/model_import_cmd.cc
+++ b/engine/cli/commands/model_import_cmd.cc
@@ -1,13 +1,7 @@
 #include "model_import_cmd.h"
-#include <filesystem>
-#include <vector>
-#include "config/gguf_parser.h"
-#include "config/yaml_config.h"
-#include "database/models.h"
+#include <json/value.h>
 #include "httplib.h"
-#include "json/json.h"
 #include "server_start_cmd.h"
-#include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 
 namespace commands {
diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc
index d769b667a..376943fd1 100644
--- a/engine/cli/commands/model_pull_cmd.cc
+++ b/engine/cli/commands/model_pull_cmd.cc
@@ -96,8 +96,6 @@ std::optional<std::string> ModelPullCmd::Exec(const std::string& host, int port,
 
   CTL_INF("model: " << model << ", model_id: " << model_id);
 
-  // Send request download model to server
-  CLI_LOG("Validating download items, please wait..")
   Json::Value json_data;
   json_data["model"] = model;
   auto data_str = json_data.toStyledString();
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index e039e5329..5ba972463 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -2,6 +2,7 @@
 #include "commands/cortex_upd_cmd.h"
 #include "utils/cortex_utils.h"
 #include "utils/file_manager_utils.h"
+#include "utils/widechar_conv.h"
 
 namespace commands {
 
@@ -57,24 +58,32 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
   ZeroMemory(&si, sizeof(si));
   si.cb = sizeof(si);
   ZeroMemory(&pi, sizeof(pi));
-  std::string params = "--start-server";
-  params += " --config_file_path " + get_config_file_path();
-  params += " --data_folder_path " + get_data_folder_path();
-  params += " --loglevel " + log_level_;
-  std::string cmds = cortex_utils::GetCurrentPath() + "/" + exe + " " + params;
+  std::wstring params = L"--start-server";
+  params += L" --config_file_path " +
+            file_manager_utils::GetConfigurationPath().wstring();
+  params += L" --data_folder_path " +
+            file_manager_utils::GetCortexDataPath().wstring();
+  params += L" --loglevel " + cortex::wc::Utf8ToWstring(log_level_);
+  std::wstring exe_w = cortex::wc::Utf8ToWstring(exe);
+  std::wstring current_path_w =
+      file_manager_utils::GetExecutableFolderContainerPath().wstring();
+  std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params;
+  CTL_DBG("wcmds: " << wcmds);
+  std::vector<wchar_t> mutable_cmds(wcmds.begin(), wcmds.end());
+  mutable_cmds.push_back(L'\0');
   // Create child process
   if (!CreateProcess(
           NULL,  // No module name (use command line)
-          const_cast<char*>(
-              cmds.c_str()),  // Command line (replace with your actual executable)
-          NULL,               // Process handle not inheritable
-          NULL,               // Thread handle not inheritable
-          FALSE,              // Set handle inheritance to FALSE
-          0,                  // No creation flags
-          NULL,               // Use parent's environment block
-          NULL,               // Use parent's starting directory
-          &si,                // Pointer to STARTUPINFO structure
-          &pi))               // Pointer to PROCESS_INFORMATION structure
+          mutable_cmds
+              .data(),  // Command line (replace with your actual executable)
+          NULL,         // Process handle not inheritable
+          NULL,         // Thread handle not inheritable
+          FALSE,        // Set handle inheritance
+          0,            // No creation flags
+          NULL,         // Use parent's environment block
+          NULL,         // Use parent's starting directory
+          &si,          // Pointer to STARTUPINFO structure
+          &pi))         // Pointer to PROCESS_INFORMATION structure
   {
     std::cout << "Could not start server: " << GetLastError() << std::endl;
     return false;
@@ -115,7 +124,8 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
     std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
     execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path",
           get_config_file_path().c_str(), "--data_folder_path",
-          get_data_folder_path().c_str(), "--loglevel", log_level_.c_str(), (char*)0);
+          get_data_folder_path().c_str(), "--loglevel", log_level_.c_str(),
+          (char*)0);
   } else {
     // Parent process
     if (!TryConnectToServer(host, port)) {
diff --git a/engine/cli/main.cc b/engine/cli/main.cc
index 49cdf4be9..a03c5adf0 100644
--- a/engine/cli/main.cc
+++ b/engine/cli/main.cc
@@ -25,6 +25,9 @@
 #error "Unsupported platform!"
 #endif
 
+#include <codecvt>
+#include <locale>
+
 void RemoveBinaryTempFileIfExists() {
   auto temp =
       file_manager_utils::GetExecutableFolderContainerPath() / "cortex_temp";
@@ -40,11 +43,20 @@ void RemoveBinaryTempFileIfExists() {
 void SetupLogger(trantor::FileLogger& async_logger, bool verbose) {
   if (!verbose) {
     auto config = file_manager_utils::GetCortexConfig();
+
     std::filesystem::create_directories(
+#if defined(_WIN32)
+        std::filesystem::u8path(config.logFolderPath) /
+#else
         std::filesystem::path(config.logFolderPath) /
+#endif
         std::filesystem::path(cortex_utils::logs_folder));
-    async_logger.setFileName(config.logFolderPath + "/" +
-                             cortex_utils::logs_cli_base_name);
+
+    // Do not need to use u8path here because trantor handles itself
+    async_logger.setFileName(
+        (std::filesystem::path(config.logFolderPath) /
+         std::filesystem::path(cortex_utils::logs_cli_base_name))
+            .string());
     async_logger.setMaxLines(config.maxLogLines);  // Keep last 100000 lines
     async_logger.startLogging();
     trantor::Logger::setOutputFunction(
@@ -192,8 +204,7 @@ int main(int argc, char* argv[]) {
   // Check if server exists, if not notify to user to install server
   auto exe = commands::GetCortexServerBinary();
   auto server_binary_path =
-      std::filesystem::path(cortex_utils::GetCurrentPath()) /
-      std::filesystem::path(exe);
+      file_manager_utils::GetExecutableFolderContainerPath() / exe;
   if (!std::filesystem::exists(server_binary_path)) {
     std::cout << CORTEX_CPP_VERSION
               << " requires server binary, to install server, run: "
diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc
index e085a660e..37920e516 100644
--- a/engine/cli/utils/download_progress.cc
+++ b/engine/cli/utils/download_progress.cc
@@ -8,6 +8,10 @@
 #include "utils/format_utils.h"
 #include "utils/json_helper.h"
 #include "utils/logging_utils.h"
+#if !defined(WIN32) && !defined(WIN64)
+#include <sys/ioctl.h>
+#include <unistd.h>
+#endif
 
 namespace {
 std::string Repo2Engine(const std::string& r) {
@@ -20,6 +24,20 @@ std::string Repo2Engine(const std::string& r) {
   }
   return r;
 };
+
+int GetColumns() {
+#if defined(WIN32) || defined(WIN64)
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  int columns;
+  GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
+  columns = csbi.srWindow.Right - csbi.srWindow.Left + 1;
+  return columns;
+#else
+  struct winsize w;
+  ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
+  return w.ws_col;
+#endif
+}
 }  // namespace
 bool DownloadProgress::Connect(const std::string& host, int port) {
   if (ws_) {
@@ -97,7 +115,7 @@ bool DownloadProgress::Handle(
         items[i.id] = std::pair(
             idx,
             std::make_unique<indicators::ProgressBar>(
-                indicators::option::BarWidth{50},
+                indicators::option::BarWidth{GetColumns() / 6},
                 indicators::option::Start{"["}, indicators::option::Fill{"="},
                 indicators::option::Lead{">"}, indicators::option::End{"]"},
                 indicators::option::PrefixText{pad_string(Repo2Engine(i.id))},
diff --git a/engine/common/download_task.h b/engine/common/download_task.h
index 94fb11a48..95e736394 100644
--- a/engine/common/download_task.h
+++ b/engine/common/download_task.h
@@ -2,9 +2,9 @@
 
 #include <json/json.h>
 #include <filesystem>
+#include <optional>
 #include <sstream>
 #include <string>
-#include <optional>
 
 enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex };
 
@@ -161,4 +161,4 @@ inline DownloadTask GetDownloadTaskFromJson(const Json::Value item_json) {
   }
   return task;
 }
-}  // namespace common
\ No newline at end of file
+}  // namespace common
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index 044fd8dd3..7d4076ee5 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -1,13 +1,12 @@
 #pragma once
 
 #include <json/json.h>
-#include <cmath>
-#include <iomanip>
 #include <limits>
 #include <sstream>
 #include <string>
 #include <vector>
 #include "utils/format_utils.h"
+
 namespace config {
 struct ModelConfig {
   std::string name;
@@ -173,6 +172,15 @@ struct ModelConfig {
         tp = json["tp"].asInt();
     }
   }
+
+  std::string ToJsonString() const {
+    auto obj = ToJson();
+    obj["id"] = obj["model"].asString();
+    Json::StreamWriterBuilder wbuilder;
+    wbuilder.settings_["precision"] = 2;
+    return Json::writeString(wbuilder, obj);
+  }
+
   Json::Value ToJson() const {
     Json::Value obj;
 
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
index 0b3c23d8b..bbe7f430c 100644
--- a/engine/config/yaml_config.cc
+++ b/engine/config/yaml_config.cc
@@ -18,6 +18,7 @@ void YamlHandler::Reset() {
 void YamlHandler::ReadYamlFile(const std::string& file_path) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
+
   try {
     yaml_node_ = YAML::LoadFile(file_path);
     // incase of model.yml file, we don't have files yet, create them
@@ -41,7 +42,6 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
       yaml_node_["files"] = v;
     }
   } catch (const YAML::BadFile& e) {
-    std::cerr << "Failed to read file: " << e.what() << std::endl;
     throw;
   }
 }
diff --git a/engine/config/yaml_config.h b/engine/config/yaml_config.h
index 87b9083a1..fe3bf4d02 100644
--- a/engine/config/yaml_config.h
+++ b/engine/config/yaml_config.h
@@ -4,7 +4,6 @@
 #include <string>
 
 #include "model_config.h"
-#include "yaml-cpp/yaml.h"
 
 namespace config {
 class YamlHandler {
diff --git a/engine/controllers/configs.cc b/engine/controllers/configs.cc
index 41b08cf45..c2cf7cc2c 100644
--- a/engine/controllers/configs.cc
+++ b/engine/controllers/configs.cc
@@ -1,4 +1,5 @@
 #include "configs.h"
+#include "utils/cortex_utils.h"
 
 void Configs::GetConfigurations(
     const HttpRequestPtr& req,
@@ -7,13 +8,13 @@ void Configs::GetConfigurations(
   if (get_config_result.has_error()) {
     Json::Value error_json;
     error_json["message"] = get_config_result.error();
-    auto resp = drogon::HttpResponse::newHttpJsonResponse(error_json);
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(error_json);
     resp->setStatusCode(drogon::k400BadRequest);
     callback(resp);
     return;
   }
 
-  auto resp = drogon::HttpResponse::newHttpJsonResponse(
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(
       get_config_result.value().ToJson());
   resp->setStatusCode(drogon::k200OK);
   callback(resp);
@@ -27,7 +28,7 @@ void Configs::UpdateConfigurations(
   if (json_body == nullptr) {
     Json::Value error_json;
     error_json["message"] = "Configuration must be provided via JSON body";
-    auto resp = drogon::HttpResponse::newHttpJsonResponse(error_json);
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(error_json);
     resp->setStatusCode(drogon::k400BadRequest);
     callback(resp);
     return;
@@ -37,7 +38,7 @@ void Configs::UpdateConfigurations(
   if (update_config_result.has_error()) {
     Json::Value error_json;
     error_json["message"] = update_config_result.error();
-    auto resp = drogon::HttpResponse::newHttpJsonResponse(error_json);
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(error_json);
     resp->setStatusCode(drogon::k400BadRequest);
     callback(resp);
     return;
@@ -46,7 +47,7 @@ void Configs::UpdateConfigurations(
   Json::Value root;
   root["message"] = "Configuration updated successfully";
   root["config"] = update_config_result.value().ToJson();
-  auto resp = drogon::HttpResponse::newHttpJsonResponse(root);
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(root);
   resp->setStatusCode(drogon::k200OK);
   callback(resp);
   return;
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index a75bd1f9b..9e110bd66 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -47,11 +47,24 @@ void Engines::ListEngine(
 void Engines::UninstallEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
-    const std::string& engine, const std::optional<std::string> version,
-    const std::optional<std::string> variant) {
+    const std::string& engine) {
+  std::optional<std::string> norm_variant = std::nullopt;
+  std::optional<std::string> norm_version = std::nullopt;
+  if (req->getJsonObject() != nullptr) {
+    auto variant = (*(req->getJsonObject())).get("variant", "").asString();
+    auto version =
+        (*(req->getJsonObject())).get("version", "latest").asString();
 
-  auto result =
-      engine_service_->UninstallEngineVariant(engine, version, variant);
+    if (!variant.empty()) {
+      norm_variant = variant;
+    }
+    if (!version.empty()) {
+      norm_version = version;
+    }
+  }
+
+  auto result = engine_service_->UninstallEngineVariant(engine, norm_version,
+                                                        norm_variant);
 
   Json::Value ret;
   if (result.has_error()) {
@@ -69,7 +82,7 @@ void Engines::UninstallEngine(
   }
 }
 
-void Engines::GetEngineVersions(
+void Engines::GetEngineReleases(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
     const std::string& engine) const {
@@ -134,12 +147,23 @@ void Engines::GetEngineVariants(
 void Engines::InstallEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
-    const std::string& engine, const std::optional<std::string> version,
-    const std::optional<std::string> variant_name) {
-  auto normalized_version = version.value_or("latest");
+    const std::string& engine) {
+  std::optional<std::string> norm_variant = std::nullopt;
+  std::string norm_version{"latest"};
+
+  if (req->getJsonObject() != nullptr) {
+    auto variant = (*(req->getJsonObject())).get("variant", "").asString();
+    auto version =
+        (*(req->getJsonObject())).get("version", "latest").asString();
 
-  auto result = engine_service_->InstallEngineAsyncV2(
-      engine, normalized_version, variant_name);
+    if (!variant.empty()) {
+      norm_variant = variant;
+    }
+    norm_version = version;
+  }
+
+  auto result =
+      engine_service_->InstallEngineAsync(engine, norm_version, norm_variant);
   if (result.has_error()) {
     Json::Value res;
     res["message"] = result.error();
@@ -218,8 +242,36 @@ void Engines::GetLatestEngineVersion(
 void Engines::SetDefaultEngineVariant(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
-    const std::string& engine, const std::string& version,
-    const std::string& variant) {
+    const std::string& engine) {
+  auto json_obj = req->getJsonObject();
+  if (json_obj == nullptr) {
+    Json::Value res;
+    res["message"] = "Request body is required";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
+
+  auto variant = (*(req->getJsonObject())).get("variant", "").asString();
+  if (variant.empty()) {
+    Json::Value ret;
+    ret["message"] = "Variant is required";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
+  auto version = (*(req->getJsonObject())).get("version", "").asString();
+  if (version.empty()) {
+    Json::Value ret;
+    ret["message"] = "Version is required";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  }
+
   auto result =
       engine_service_->SetDefaultEngineVariant(engine, version, variant);
   if (result.has_error()) {
diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h
index dc9ef5a54..b0a92b6c3 100644
--- a/engine/controllers/engines.h
+++ b/engine/controllers/engines.h
@@ -12,57 +12,72 @@ class Engines : public drogon::HttpController<Engines, false> {
  public:
   METHOD_LIST_BEGIN
 
-  METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get);
-  METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Options,
-             Post);
-  METHOD_ADD(Engines::UninstallEngine, "/{1}?version={2}&variant={3}", Options,
-             Delete);
-  METHOD_ADD(Engines::SetDefaultEngineVariant,
-             "/{1}/default?version={2}&variant={3}", Options, Post);
-  METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get);
+  // install engine
+  METHOD_ADD(Engines::InstallEngine, "/{1}/install", Options, Post);
+  ADD_METHOD_TO(Engines::InstallEngine, "/v1/engines/{1}/install", Options,
+                Post);
 
-  METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post);
-  METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete);
-  METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post);
-  METHOD_ADD(Engines::ListEngine, "", Get);
+  // uninstall engine
+  METHOD_ADD(Engines::UninstallEngine, "/{1}/install", Options, Delete);
+  ADD_METHOD_TO(Engines::UninstallEngine, "/v1/engines/{1}/install", Options,
+                Delete);
 
-  METHOD_ADD(Engines::GetEngineVersions, "/{1}/versions", Get);
-  METHOD_ADD(Engines::GetEngineVariants, "/{1}/versions/{2}", Get);
-  METHOD_ADD(Engines::GetLatestEngineVersion, "/{1}/latest", Get);
+  // set default engine
+  METHOD_ADD(Engines::SetDefaultEngineVariant, "/{1}/default", Options, Post);
+  ADD_METHOD_TO(Engines::SetDefaultEngineVariant, "/v1/engines/{1}/default",
+                Options, Post);
 
-  ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get);
-  ADD_METHOD_TO(Engines::InstallEngine,
-                "/v1/engines/{1}?version={2}&variant={3}", Options, Post);
-  ADD_METHOD_TO(Engines::UninstallEngine,
-                "/v1/engines/{1}?version={2}&variant={3}", Options, Delete);
-  ADD_METHOD_TO(Engines::SetDefaultEngineVariant,
-                "/v1/engines/{1}/default?version={2}&variant={3}", Options,
-                Post);
+  // get default engine
+  METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get);
   ADD_METHOD_TO(Engines::GetDefaultEngineVariant, "/v1/engines/{1}/default",
                 Get);
 
+  // update engine
+  METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post);
+  ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post);
+
+  // load engine
+  METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post);
   ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Options, Post);
+
+  // unload engine
+  METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete);
   ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Options, Delete);
-  ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post);
-  ADD_METHOD_TO(Engines::GetEngineVersions, "/v1/engines/{1}/versions", Get);
-  ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/versions/{2}",
-                Get);
+
+  METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get);
+  ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get);
+
+  METHOD_ADD(Engines::ListEngine, "", Get);
   ADD_METHOD_TO(Engines::ListEngine, "/v1/engines", Get);
+
+  METHOD_ADD(Engines::GetEngineReleases, "/{1}/releases", Get);
+  ADD_METHOD_TO(Engines::GetEngineReleases, "/v1/engines/{1}/releases", Get);
+
+  METHOD_ADD(Engines::GetEngineVariants, "/{1}/releases/{2}", Get);
+  ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/releases/{2}",
+                Get);
+
+  METHOD_ADD(Engines::GetLatestEngineVersion, "/{1}/releases/latest", Get);
+  ADD_METHOD_TO(Engines::GetLatestEngineVersion,
+                "/v1/engines/{1}/releases/latest", Get);
+
   METHOD_LIST_END
 
   explicit Engines(std::shared_ptr<EngineService> engine_service)
       : engine_service_{engine_service} {}
 
-  void ListEngine(const HttpRequestPtr& req,
-                  std::function<void(const HttpResponsePtr&)>&& callback) const;
+  void InstallEngine(const HttpRequestPtr& req,
+                     std::function<void(const HttpResponsePtr&)>&& callback,
+                     const std::string& engine);
 
   void UninstallEngine(const HttpRequestPtr& req,
                        std::function<void(const HttpResponsePtr&)>&& callback,
-                       const std::string& engine,
-                       const std::optional<std::string> version,
-                       const std::optional<std::string> variant);
+                       const std::string& engine);
+
+  void ListEngine(const HttpRequestPtr& req,
+                  std::function<void(const HttpResponsePtr&)>&& callback) const;
 
-  void GetEngineVersions(const HttpRequestPtr& req,
+  void GetEngineReleases(const HttpRequestPtr& req,
                          std::function<void(const HttpResponsePtr&)>&& callback,
                          const std::string& engine) const;
 
@@ -71,12 +86,6 @@ class Engines : public drogon::HttpController<Engines, false> {
                          const std::string& engine,
                          const std::string& version) const;
 
-  void InstallEngine(const HttpRequestPtr& req,
-                     std::function<void(const HttpResponsePtr&)>&& callback,
-                     const std::string& engine,
-                     const std::optional<std::string> version,
-                     const std::optional<std::string> variant_name);
-
   void GetInstalledEngineVariants(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback,
@@ -94,8 +103,7 @@ class Engines : public drogon::HttpController<Engines, false> {
   void SetDefaultEngineVariant(
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback,
-      const std::string& engine, const std::string& version,
-      const std::string& variant);
+      const std::string& engine);
 
   void GetDefaultEngineVariant(
       const HttpRequestPtr& req,
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 9e4ba1e9f..c51bb3b77 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -153,7 +153,7 @@ void Models::ListModel(
   Json::Value ret;
   ret["object"] = "list";
   Json::Value data(Json::arrayValue);
-
+  model_service_->ForceIndexingModelList();
   // Iterate through directory
 
   cortex::db::Models modellist_handler;
@@ -224,13 +224,9 @@ void Models::GetModel(const HttpRequestPtr& req,
             .string());
     auto model_config = yaml_handler.GetModelConfig();
 
-    ret = model_config.ToJson();
-
-    ret["id"] = model_config.model;
-    ret["object"] = "model";
-    ret["result"] = "OK";
-    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
-    resp->setStatusCode(k200OK);
+    auto ret = model_config.ToJsonString();
+    auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret);
+    resp->setStatusCode(drogon::k200OK);
     callback(resp);
   } catch (const std::exception& e) {
     std::string message =
diff --git a/engine/controllers/swagger.cc b/engine/controllers/swagger.cc
index d605daa0b..96a6c3837 100644
--- a/engine/controllers/swagger.cc
+++ b/engine/controllers/swagger.cc
@@ -1,5 +1,6 @@
 #include "swagger.h"
 #include "cortex_openapi.h"
+#include "utils/cortex_utils.h"
 
 constexpr auto ScalarUi = R"(
 <!doctype html>
@@ -31,7 +32,7 @@ Json::Value SwaggerController::generateOpenAPISpec() {
 void SwaggerController::serveSwaggerUI(
     const drogon::HttpRequestPtr& req,
     std::function<void(const drogon::HttpResponsePtr&)>&& callback) const {
-  auto resp = drogon::HttpResponse::newHttpResponse();
+  auto resp = cortex_utils::CreateCortexHttpResponse();
   resp->setBody(ScalarUi);
   resp->setContentTypeCode(drogon::CT_TEXT_HTML);
   callback(resp);
@@ -41,6 +42,6 @@ void SwaggerController::serveOpenAPISpec(
     const drogon::HttpRequestPtr& req,
     std::function<void(const drogon::HttpResponsePtr&)>&& callback) const {
   Json::Value spec = generateOpenAPISpec();
-  auto resp = drogon::HttpResponse::newHttpJsonResponse(spec);
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(spec);
   callback(resp);
 }
diff --git a/engine/database/database.h b/engine/database/database.h
index 27c75e923..dbe58cc4b 100644
--- a/engine/database/database.h
+++ b/engine/database/database.h
@@ -20,7 +20,7 @@ class Database {
 
  private:
   Database()
-      : db_(file_manager_utils::GetCortexDataPath().string() + "/cortex.db",
+      : db_(file_manager_utils::GetCortexDataPath() / "cortex.db",
             SQLite::OPEN_READWRITE | SQLite::OPEN_CREATE) {}
   SQLite::Database db_;
 };
diff --git a/engine/database/hardwares.cc b/engine/database/hardware.cc
similarity index 80%
rename from engine/database/hardwares.cc
rename to engine/database/hardware.cc
index c23aec0b7..ee68749d5 100644
--- a/engine/database/hardwares.cc
+++ b/engine/database/hardware.cc
@@ -1,27 +1,13 @@
-#include "hardwares.h"
+#include "hardware.h"
 #include "database.h"
 #include "utils/scope_exit.h"
 
 namespace cortex::db {
 
 Hardwares::Hardwares() : db_(cortex::db::Database::GetInstance().db()) {
-  db_.exec(
-      "CREATE TABLE IF NOT EXISTS hardwares ("
-      "uuid TEXT PRIMARY KEY,"
-      "type TEXT,"
-      "hardware_id INTEGER,"
-      "software_id INTEGER,"
-      "activated INTEGER);");
 }
 
 Hardwares::Hardwares(SQLite::Database& db) : db_(db) {
-  db_.exec(
-      "CREATE TABLE IF NOT EXISTS hardwares ("
-      "uuid TEXT PRIMARY KEY,"
-      "type TEXT,"
-      "hardware_id INTEGER,"
-      "software_id INTEGER,"
-      "activated INTEGER);");
 }
 
 Hardwares::~Hardwares() {}
@@ -35,7 +21,7 @@ Hardwares::LoadHardwareList() const {
     SQLite::Statement query(
         db_,
         "SELECT uuid, type, "
-        "hardware_id, software_id, activated FROM hardwares");
+        "hardware_id, software_id, activated FROM hardware");
 
     while (query.executeStep()) {
       HardwareEntry entry;
@@ -57,7 +43,7 @@ cpp::result<bool, std::string> Hardwares::AddHardwareEntry(
   try {
     SQLite::Statement insert(
         db_,
-        "INSERT INTO hardwares (uuid, type, "
+        "INSERT INTO hardware (uuid, type, "
         "hardware_id, software_id, activated) VALUES (?, ?, "
         "?, ?, ?)");
     insert.bind(1, new_entry.uuid);
@@ -77,7 +63,7 @@ cpp::result<bool, std::string> Hardwares::UpdateHardwareEntry(
     const std::string& id, const HardwareEntry& updated_entry) {
   try {
     SQLite::Statement upd(db_,
-                          "UPDATE hardwares "
+                          "UPDATE hardware "
                           "SET hardware_id = ?, software_id = ?, activated = ? "
                           "WHERE uuid = ?");
     upd.bind(1, updated_entry.hardware_id);
@@ -97,7 +83,7 @@ cpp::result<bool, std::string> Hardwares::UpdateHardwareEntry(
 cpp::result<bool, std::string> Hardwares::DeleteHardwareEntry(
     const std::string& id) {
   try {
-    SQLite::Statement del(db_, "DELETE from hardwares WHERE uuid = ?");
+    SQLite::Statement del(db_, "DELETE from hardware WHERE uuid = ?");
     del.bind(1, id);
     if (del.exec() == 1) {
       CTL_INF("Deleted: " << id);
diff --git a/engine/database/hardwares.h b/engine/database/hardware.h
similarity index 100%
rename from engine/database/hardwares.h
rename to engine/database/hardware.h
diff --git a/engine/database/models.cc b/engine/database/models.cc
index d0bee405c..3e81fbab2 100644
--- a/engine/database/models.cc
+++ b/engine/database/models.cc
@@ -1,6 +1,5 @@
 #include "models.h"
 #include <algorithm>
-#include <iostream>
 #include <sstream>
 #include "database.h"
 #include "utils/result.hpp"
@@ -8,25 +7,9 @@
 
 namespace cortex::db {
 
-Models::Models() : db_(cortex::db::Database::GetInstance().db()) {
-  db_.exec(
-      "CREATE TABLE IF NOT EXISTS models ("
-      "model_id TEXT PRIMARY KEY,"
-      "author_repo_id TEXT,"
-      "branch_name TEXT,"
-      "path_to_model_yaml TEXT,"
-      "model_alias TEXT);");
-}
+Models::Models() : db_(cortex::db::Database::GetInstance().db()) {}
 
-Models::Models(SQLite::Database& db) : db_(db) {
-  db_.exec(
-      "CREATE TABLE IF NOT EXISTS models ("
-      "model_id TEXT PRIMARY KEY,"
-      "author_repo_id TEXT,"
-      "branch_name TEXT,"
-      "path_to_model_yaml TEXT,"
-      "model_alias TEXT UNIQUE);");
-}
+Models::Models(SQLite::Database& db) : db_(db) {}
 
 Models::~Models() {}
 
@@ -262,6 +245,11 @@ cpp::result<bool, std::string> Models::UpdateModelAlias(
 cpp::result<bool, std::string> Models::DeleteModelEntry(
     const std::string& identifier) {
   try {
+    // delete only if its there
+    if (!HasModel(identifier)) {
+      return true;
+    }
+
     SQLite::Statement del(
         db_, "DELETE from models WHERE model_id = ? OR model_alias = ?");
     del.bind(1, identifier);
diff --git a/engine/e2e-test/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/cortex-llamacpp-e2e-nightly.py
new file mode 100644
index 000000000..9be34519a
--- /dev/null
+++ b/engine/e2e-test/cortex-llamacpp-e2e-nightly.py
@@ -0,0 +1,33 @@
+import pytest
+import sys
+
+### e2e tests are expensive, have to keep engines tests in order
+from test_api_engine_list import TestApiEngineList
+from test_api_engine_install_nightly import TestApiEngineInstall
+from test_api_engine_get import TestApiEngineGet
+
+### models, keeps in order, note that we only uninstall engine after finishing all models test
+from test_api_model_pull_direct_url import TestApiModelPullDirectUrl
+from test_api_model_start import TestApiModelStart
+from test_api_model_stop import TestApiModelStop
+from test_api_model_get import TestApiModelGet
+from test_api_model_list import TestApiModelList
+from test_api_model_update import TestApiModelUpdate
+from test_api_model_delete import TestApiModelDelete
+from test_api_model_import import TestApiModelImport
+from test_api_engine_uninstall import TestApiEngineUninstall
+
+###
+from test_cli_engine_get import TestCliEngineGet
+from test_cli_engine_install_nightly import TestCliEngineInstall
+from test_cli_engine_list import TestCliEngineList
+from test_cli_model_delete import TestCliModelDelete
+from test_cli_model_pull_direct_url import TestCliModelPullDirectUrl
+from test_cli_server_start import TestCliServerStart
+from test_cortex_update import TestCortexUpdate
+from test_create_log_folder import TestCreateLogFolder
+from test_cli_model_import import TestCliModelImport
+from test_cli_engine_uninstall import TestCliEngineUninstall
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/test_api_engine_install.py b/engine/e2e-test/test_api_engine_install.py
index b0fbb6c9c..aabe0138d 100644
--- a/engine/e2e-test/test_api_engine_install.py
+++ b/engine/e2e-test/test_api_engine_install.py
@@ -18,17 +18,19 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_engines_install_llamacpp_should_be_successful(self):
-        response = requests.post("http://localhost:3928/v1/engines/llama-cpp")
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
         assert response.status_code == 200
 
     def test_engines_install_llamacpp_specific_version_and_variant(self):
+        data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"}
         response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24&variant=linux-amd64-avx-cuda-11-7"
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
         )
         assert response.status_code == 200
 
     def test_engines_install_llamacpp_specific_version_and_null_variant(self):
+        data = {"version": "v0.1.35-27.10.24"}
         response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24"
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
         )
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_engine_install_nightly.py b/engine/e2e-test/test_api_engine_install_nightly.py
new file mode 100644
index 000000000..de4914c28
--- /dev/null
+++ b/engine/e2e-test/test_api_engine_install_nightly.py
@@ -0,0 +1,37 @@
+import pytest
+import requests
+from test_runner import start_server, stop_server, get_latest_pre_release_tag
+
+latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp")
+
+class TestApiEngineInstall:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_engines_install_llamacpp_should_be_successful(self):
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_variant(self):
+        data = {"version": latest_pre_release_tag, "variant": "linux-amd64-avx-cuda-11-7"}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_null_variant(self):
+        data = {"version": latest_pre_release_tag}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_engine_list.py b/engine/e2e-test/test_api_engine_list.py
index 974fbbf8e..71b9ea8b4 100644
--- a/engine/e2e-test/test_api_engine_list.py
+++ b/engine/e2e-test/test_api_engine_list.py
@@ -8,6 +8,9 @@ class TestApiEngineList:
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        # Not sure why but on macOS amd, the first start server timeouts with CI
+        start_server()
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/e2e-test/test_api_engine_uninstall.py b/engine/e2e-test/test_api_engine_uninstall.py
index 491bc2d27..2a491d07a 100644
--- a/engine/e2e-test/test_api_engine_uninstall.py
+++ b/engine/e2e-test/test_api_engine_uninstall.py
@@ -1,6 +1,12 @@
 import pytest
+import time
 import requests
-from test_runner import start_server, stop_server
+from test_runner import (
+    run,
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
 
 
 class TestApiEngineUninstall:
@@ -16,49 +22,58 @@ def setup_and_teardown(self):
 
         # Teardown
         stop_server()
-
-    def test_engines_uninstall_llamacpp_should_be_successful(self):
-        # install first
-        requests.post("http://localhost:3928/v1/engines/llama-cpp")
-
-        response = requests.delete("http://localhost:3928/v1/engines/llama-cpp")
+        
+    @pytest.mark.asyncio
+    async def test_engines_uninstall_llamacpp_should_be_successful(self):
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+        await wait_for_websocket_download_success_event(timeout=None)
+        time.sleep(30)
+        
+        response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install")
         assert response.status_code == 200
 
-    def test_engines_uninstall_llamacpp_with_only_version_should_be_failed(self):
+    @pytest.mark.asyncio
+    async def test_engines_uninstall_llamacpp_with_only_version_should_be_failed(self):
         # install first
+        data = {"variant": "mac-arm64"}
         install_response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35"
+            "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
         )
+        await wait_for_websocket_download_success_event(timeout=120)
         assert install_response.status_code == 200
 
+        data = {"version": "v0.1.35"}
         response = requests.delete(
-            "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35"
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
         )
         assert response.status_code == 400
         assert response.json()["message"] == "No variant provided"
 
-    def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self):
+    @pytest.mark.asyncio
+    async def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self):
         # install first
+        data = {"variant": "mac-arm64"}
         install_response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64"
+            "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
         )
+        await wait_for_websocket_download_success_event(timeout=120)
         assert install_response.status_code == 200
 
-        response = requests.delete(
-            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64"
-        )
+        response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
         assert response.status_code == 200
 
     def test_engines_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
         self,
     ):
+        data = {"variant": "mac-arm64", "version": "v0.1.35"}
         # install first
         install_response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35"
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
         )
         assert install_response.status_code == 200
 
         response = requests.delete(
-            "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35"
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
         )
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py
index d6a98a78b..b3e33d113 100644
--- a/engine/e2e-test/test_api_model_start.py
+++ b/engine/e2e-test/test_api_model_start.py
@@ -1,8 +1,10 @@
 import pytest
+import time
 import requests
 from test_runner import run, start_server, stop_server
-
-
+from test_runner import (
+    wait_for_websocket_download_success_event
+)
 class TestApiModelStart:
 
     @pytest.fixture(autouse=True)
@@ -12,20 +14,28 @@ def setup_and_teardown(self):
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
-        run("Install engine", ["engines", "install", "llama-cpp"], 5 * 60)
         run("Delete model", ["models", "delete", "tinyllama:gguf"])
-        run(
-            "Pull model",
-            ["pull", "tinyllama:gguf"],
-            timeout=None,
-        )
 
         yield
 
         # Teardown
         stop_server()
-
-    def test_models_start_should_be_successful(self):
+        
+    @pytest.mark.asyncio
+    async def test_models_start_should_be_successful(self):
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+        await wait_for_websocket_download_success_event(timeout=None)
+        # TODO(sang) need to fix for cuda download
+        time.sleep(30)
+        
+        json_body = {
+            "model": "tinyllama:gguf"
+        }
+        response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
+        assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf"
+        await wait_for_websocket_download_success_event(timeout=None)
+                
         json_body = {"model": "tinyllama:gguf"}
         response = requests.post(
             "http://localhost:3928/v1/models/start", json=json_body
diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py
index dc3b6b77b..4fc7a55e2 100644
--- a/engine/e2e-test/test_api_model_stop.py
+++ b/engine/e2e-test/test_api_model_stop.py
@@ -1,7 +1,10 @@
 import pytest
+import time
 import requests
 from test_runner import run, start_server, stop_server
-
+from test_runner import (
+    wait_for_websocket_download_success_event
+)
 
 class TestApiModelStop:
 
@@ -13,14 +16,19 @@ def setup_and_teardown(self):
         if not success:
             raise Exception("Failed to start server")
 
-        run("Install engine", ["engines", "install", "llama-cpp"], 5 * 60)
         yield
 
         run("Uninstall engine", ["engines", "uninstall", "llama-cpp"])
         # Teardown
         stop_server()
 
-    def test_models_stop_should_be_successful(self):
+    @pytest.mark.asyncio
+    async def test_models_stop_should_be_successful(self):
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+        await wait_for_websocket_download_success_event(timeout=None)
+        time.sleep(30)
+        
         json_body = {"model": "tinyllama:gguf"}
         response = requests.post(
             "http://localhost:3928/v1/models/start", json=json_body
diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py
index 380334222..a998f3183 100644
--- a/engine/e2e-test/test_cli_engine_install.py
+++ b/engine/e2e-test/test_cli_engine_install.py
@@ -19,6 +19,7 @@ def setup_and_teardown(self):
         # Teardown
         stop_server()
 
+    @pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows")
     def test_engines_install_llamacpp_should_be_successfully(self):
         exit_code, output, error = run(
             "Install Engine",
@@ -46,6 +47,7 @@ def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
         assert "is not supported on" in output, "Should display error message"
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
+    @pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows")
     def test_engines_install_pre_release_llamacpp(self):
         engine_version = "v0.1.29"
         exit_code, output, error = run(
@@ -67,6 +69,7 @@ def test_engines_install_pre_release_llamacpp(self):
         assert is_engine_version_exist, f"Engine version {engine_version} is not found"
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
+    @pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows")
     def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self):
         exit_code, output, error = run(
             "Install Engine",
diff --git a/engine/e2e-test/test_cli_engine_install_nightly.py b/engine/e2e-test/test_cli_engine_install_nightly.py
new file mode 100644
index 000000000..8c66c284c
--- /dev/null
+++ b/engine/e2e-test/test_cli_engine_install_nightly.py
@@ -0,0 +1,89 @@
+import platform
+import tempfile
+
+import pytest
+import requests
+from test_runner import run, start_server, stop_server, get_latest_pre_release_tag
+
+latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp")
+
+class TestCliEngineInstall:
+    def setup_and_teardown(self):
+        # Setup
+        stop_server()
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_engines_install_llamacpp_should_be_successfully(self):
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp"],
+            timeout=None,
+            capture=False,
+        )
+        response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        assert len(response.json()) > 0
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
+    def test_engines_install_onnx_on_macos_should_be_failed(self):
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "onnxruntime"]
+        )
+        assert "is not supported on" in output, "Should display error message"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test")
+    def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "tensorrt-llm"]
+        )
+        assert "is not supported on" in output, "Should display error message"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self):
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-s", tempfile.gettempdir()],
+            timeout=None,
+        )
+        # response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        # assert len(response.json()) > 0
+        assert "downloaded successfully" in output
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    def test_engines_should_not_perform_with_dummy_path(self):
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-s", "abcpod"],
+            timeout=None,
+        )
+        assert "Folder does not exist" in output, "Should display error"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+    def test_engines_install_pre_release_llamacpp(self):
+        engine_version = latest_pre_release_tag
+        exit_code, output, error = run(
+            "Install Engine",
+            ["engines", "install", "llama-cpp", "-v", engine_version],
+            timeout=None,
+            capture=False,
+        )
+        response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        assert len(response.json()) > 0
+        is_engine_version_exist = False
+        for item in response.json():
+            # Check if 'version' key exists and matches target
+            if "version" in item and item["version"] == engine_version:
+                is_engine_version_exist = True
+                break
+
+        # loop through all the installed response, expect we find
+        assert is_engine_version_exist, f"Engine version {engine_version} is not found"
+        assert exit_code == 0, f"Install engine failed with error: {error}"
diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py
index 0ca151d48..fcc5f5c73 100644
--- a/engine/e2e-test/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/test_cli_engine_uninstall.py
@@ -24,7 +24,7 @@ def setup_and_teardown(self):
 
     @pytest.mark.asyncio
     async def test_engines_uninstall_llamacpp_should_be_successfully(self):
-        requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp")
+        requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
         await wait_for_websocket_download_success_event(timeout=None)
         exit_code, output, error = run(
             "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
diff --git a/engine/e2e-test/test_cli_model_delete.py b/engine/e2e-test/test_cli_model_delete.py
index f7ab53058..d0ba43ec1 100644
--- a/engine/e2e-test/test_cli_model_delete.py
+++ b/engine/e2e-test/test_cli_model_delete.py
@@ -1,6 +1,10 @@
 import pytest
+import requests
 from test_runner import popen, run
 from test_runner import start_server, stop_server
+from test_runner import (
+    wait_for_websocket_download_success_event
+)
 
 class TestCliModelDelete:
 
@@ -11,10 +15,6 @@ def setup_and_teardown(self):
         if not success:
             raise Exception("Failed to start server")
 
-        # Pull model
-
-        run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,)
-
         yield
 
         # Teardown
@@ -22,7 +22,15 @@ def setup_and_teardown(self):
         run("Delete model", ["models", "delete", "tinyllama:gguf"])
         stop_server()
 
-    def test_models_delete_should_be_successful(self):
+    @pytest.mark.asyncio    
+    async def test_models_delete_should_be_successful(self):
+        json_body = {
+            "model": "tinyllama:gguf"
+        }
+        response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
+        assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf"
+        await wait_for_websocket_download_success_event(timeout=None)
+        
         exit_code, output, error = run(
             "Delete model", ["models", "delete", "tinyllama:gguf"]
         )
diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py
index e93e152e0..843e669b4 100644
--- a/engine/e2e-test/test_runner.py
+++ b/engine/e2e-test/test_runner.py
@@ -6,6 +6,7 @@
 import subprocess
 import threading
 import time
+import requests
 from typing import List
 
 import websockets
@@ -187,3 +188,36 @@ async def receive_until_success():
 
         except asyncio.TimeoutError:
             raise TimeoutError("Timeout waiting for DownloadSuccess event")
+
+
+def get_latest_pre_release_tag(repo_owner, repo_name):
+    # URL for GitHub API to fetch all releases of the repository
+    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/releases"
+    
+    # Headers to specify the API version
+    headers = {
+        "Accept": "application/vnd.github.v3+json"
+    }
+    
+    # Send a GET request to the GitHub API
+    response = requests.get(url, headers=headers)
+    
+    # Check the response status; raise an error if the request failed
+    if response.status_code != 200:
+        raise Exception(f"Failed to fetch releases: {response.status_code}, {response.text}")
+    
+    # Parse the JSON response into a list of releases
+    releases = response.json()
+    
+    # Filter the releases to include only pre-releases
+    pre_releases = [release for release in releases if release.get("prerelease")]
+    
+    # If no pre-releases are found, raise an exception
+    if not pre_releases:
+        raise Exception("No pre-releases found")
+    
+    # Sort the pre-releases by creation date, newest first
+    pre_releases.sort(key=lambda x: x["created_at"], reverse=True)
+    
+    # Return the tag name of the latest pre-release
+    return pre_releases[0]["tag_name"]
\ No newline at end of file
diff --git a/engine/main.cc b/engine/main.cc
index 4b95785bb..b39c4c6e2 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -9,7 +9,10 @@
 #include "controllers/process_manager.h"
 #include "controllers/server.h"
 #include "cortex-common/cortexpythoni.h"
+#include "database/database.h"
+#include "migrations/migration_manager.h"
 #include "services/config_service.h"
+#include "services/file_watcher_service.h"
 #include "services/model_service.h"
 #include "utils/archive_utils.h"
 #include "utils/cortex_utils.h"
@@ -19,6 +22,7 @@
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/system_info_utils.h"
+#include "utils/widechar_conv.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -63,7 +67,11 @@ void RunServer(std::optional<int> port, bool ignore_cout) {
   }
   // Create logs/ folder and setup log to file
   std::filesystem::create_directories(
+#if defined(_WIN32)
+      std::filesystem::u8path(config.logFolderPath) /
+#else
       std::filesystem::path(config.logFolderPath) /
+#endif
       std::filesystem::path(cortex_utils::logs_folder));
   static trantor::FileLogger asyncFileLogger;
   asyncFileLogger.setFileName(
@@ -107,6 +115,7 @@ void RunServer(std::optional<int> port, bool ignore_cout) {
   auto event_queue_ptr = std::make_shared<EventQueue>();
   cortex::event::EventProcessor event_processor(event_queue_ptr);
 
+  auto model_dir_path = file_manager_utils::GetModelsContainerPath();
   auto config_service = std::make_shared<ConfigService>();
   auto download_service =
       std::make_shared<DownloadService>(event_queue_ptr, config_service);
@@ -116,6 +125,10 @@ void RunServer(std::optional<int> port, bool ignore_cout) {
   auto model_service = std::make_shared<ModelService>(
       download_service, inference_svc, engine_service);
 
+  auto file_watcher_srv = std::make_shared<FileWatcherService>(
+      model_dir_path.string(), model_service);
+  file_watcher_srv->start();
+
   // initialize custom controllers
   auto engine_ctl = std::make_shared<Engines>(engine_service);
   auto model_ctl = std::make_shared<Models>(model_service, engine_service);
@@ -142,6 +155,8 @@ void RunServer(std::optional<int> port, bool ignore_cout) {
   LOG_INFO << "Please load your model";
 #ifndef _WIN32
   drogon::app().enableReusePort();
+#else
+  drogon::app().enableDateHeader(false);
 #endif
   drogon::app().addListener(config.apiServerHost,
                             std::stoi(config.apiServerPort));
@@ -190,7 +205,11 @@ void RunServer(std::optional<int> port, bool ignore_cout) {
   }
 }
 
+#if defined(_WIN32)
+int wmain(int argc, wchar_t* argv[]) {
+#else
 int main(int argc, char* argv[]) {
+#endif
   // Stop the program if the system is not supported
   auto system_info = system_info_utils::GetSystemInfo();
   if (system_info->arch == system_info_utils::kUnsupported ||
@@ -205,6 +224,28 @@ int main(int argc, char* argv[]) {
 
   std::optional<int> server_port;
   bool ignore_cout_log = false;
+#if defined(_WIN32)
+  for (int i = 0; i < argc; i++) {
+    std::wstring command = argv[i];
+    if (command == L"--config_file_path") {
+      std::wstring v = argv[i + 1];
+      file_manager_utils::cortex_config_file_path =
+          cortex::wc::WstringToUtf8(v);
+    } else if (command == L"--data_folder_path") {
+      std::wstring v = argv[i + 1];
+      file_manager_utils::cortex_data_folder_path =
+          cortex::wc::WstringToUtf8(v);
+    } else if (command == L"--port") {
+      server_port = std::stoi(argv[i + 1]);
+    } else if (command == L"--ignore_cout") {
+      ignore_cout_log = true;
+    } else if (command == L"--loglevel") {
+      std::wstring v = argv[i + 1];
+      std::string log_level = cortex::wc::WstringToUtf8(v);
+      logging_utils_helper::SetLogLevel(log_level, ignore_cout_log);
+    }
+  }
+#else
   for (int i = 0; i < argc; i++) {
     if (strcmp(argv[i], "--config_file_path") == 0) {
       file_manager_utils::cortex_config_file_path = argv[i + 1];
@@ -219,6 +260,7 @@ int main(int argc, char* argv[]) {
       logging_utils_helper::SetLogLevel(log_level, ignore_cout_log);
     }
   }
+#endif
 
   {
     auto result = file_manager_utils::CreateConfigFileIfNotExist();
@@ -244,6 +286,15 @@ int main(int argc, char* argv[]) {
     }
   }
 
+  // check if migration is needed
+  if (auto res = cortex::migr::MigrationManager(
+                     cortex::db::Database::GetInstance().db())
+                     .Migrate();
+      res.has_error()) {
+    CLI_LOG("Error: " << res.error());
+    return 1;
+  }
+
   // Delete temporary file if it exists
   auto temp =
       file_manager_utils::GetExecutableFolderContainerPath() / "cortex_temp";
@@ -255,26 +306,26 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  // Check if this process is for python execution
-  if (argc > 1) {
-    if (strcmp(argv[1], "--run_python_file") == 0) {
-      std::string py_home_path = (argc > 3) ? argv[3] : "";
-      std::unique_ptr<cortex_cpp::dylib> dl;
-      try {
-        std::string abs_path =
-            cortex_utils::GetCurrentPath() + kPythonRuntimeLibPath;
-        dl = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
-      } catch (const cortex_cpp::dylib::load_error& e) {
-        LOG_ERROR << "Could not load engine: " << e.what();
-        return 1;
-      }
+  // // Check if this process is for python execution
+  // if (argc > 1) {
+  //   if (strcmp(argv[1], "--run_python_file") == 0) {
+  //     std::string py_home_path = (argc > 3) ? argv[3] : "";
+  //     std::unique_ptr<cortex_cpp::dylib> dl;
+  //     try {
+  //       std::string abs_path =
+  //           cortex_utils::GetCurrentPath() + kPythonRuntimeLibPath;
+  //       dl = std::make_unique<cortex_cpp::dylib>(abs_path, "engine");
+  //     } catch (const cortex_cpp::dylib::load_error& e) {
+  //       LOG_ERROR << "Could not load engine: " << e.what();
+  //       return 1;
+  //     }
 
-      auto func = dl->get_function<CortexPythonEngineI*()>("get_engine");
-      auto e = func();
-      e->ExecutePythonFile(argv[0], argv[2], py_home_path);
-      return 0;
-    }
-  }
+  //     auto func = dl->get_function<CortexPythonEngineI*()>("get_engine");
+  //     auto e = func();
+  //     e->ExecutePythonFile(argv[0], argv[2], py_home_path);
+  //     return 0;
+  //   }
+  // }
 
   RunServer(server_port, ignore_cout_log);
   return 0;
diff --git a/engine/migrations/migration_helper.cc b/engine/migrations/migration_helper.cc
new file mode 100644
index 000000000..f2b39d77e
--- /dev/null
+++ b/engine/migrations/migration_helper.cc
@@ -0,0 +1,72 @@
+#include "migration_helper.h"
+
+namespace cortex::migr {
+cpp::result<bool, std::string> MigrationHelper::BackupDatabase(
+    const std::filesystem::path& src_db_path,
+    const std::string& backup_db_path) {
+  try {
+    SQLite::Database src_db(src_db_path, SQLite::OPEN_READONLY);
+    sqlite3* backup_db;
+
+    if (sqlite3_open16(backup_db_path.c_str(), &backup_db) != SQLITE_OK) {
+      throw std::runtime_error("Failed to open backup database");
+    }
+
+    sqlite3_backup* backup =
+        sqlite3_backup_init(backup_db, "main", src_db.getHandle(), "main");
+    if (!backup) {
+      sqlite3_close(backup_db);
+      throw std::runtime_error("Failed to initialize backup");
+    }
+
+    if (sqlite3_backup_step(backup, -1) != SQLITE_DONE) {
+      sqlite3_backup_finish(backup);
+      sqlite3_close(backup_db);
+      throw std::runtime_error("Failed to perform backup");
+    }
+
+    sqlite3_backup_finish(backup);
+    sqlite3_close(backup_db);
+    // CTL_INF("Backup completed successfully.");
+    return true;
+  } catch (const std::exception& e) {
+    CTL_WRN("Error during backup: " << e.what());
+    return cpp::fail(e.what());
+  }
+}
+
+cpp::result<bool, std::string> MigrationHelper::RestoreDatabase(
+    const std::string& backup_db_path,
+    const std::filesystem::path& target_db_path) {
+  try {
+    SQLite::Database target_db(target_db_path,
+                               SQLite::OPEN_READWRITE | SQLite::OPEN_CREATE);
+    sqlite3* backup_db;
+
+    if (sqlite3_open16(backup_db_path.c_str(), &backup_db) != SQLITE_OK) {
+      throw std::runtime_error("Failed to open backup database");
+    }
+
+    sqlite3_backup* backup =
+        sqlite3_backup_init(target_db.getHandle(), "main", backup_db, "main");
+    if (!backup) {
+      sqlite3_close(backup_db);
+      throw std::runtime_error("Failed to initialize restore");
+    }
+
+    if (sqlite3_backup_step(backup, -1) != SQLITE_DONE) {
+      sqlite3_backup_finish(backup);
+      sqlite3_close(backup_db);
+      throw std::runtime_error("Failed to perform restore");
+    }
+
+    sqlite3_backup_finish(backup);
+    sqlite3_close(backup_db);
+    // CTL_INF("Restore completed successfully.");
+    return true;
+  } catch (const std::exception& e) {
+    CTL_WRN("Error during restore: " << e.what());
+    return cpp::fail(e.what());
+  }
+}
+}  // namespace cortex::migr
\ No newline at end of file
diff --git a/engine/migrations/migration_helper.h b/engine/migrations/migration_helper.h
new file mode 100644
index 000000000..cdf7b8f55
--- /dev/null
+++ b/engine/migrations/migration_helper.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <SQLiteCpp/SQLiteCpp.h>
+#include <sqlite3.h>
+#include <filesystem>
+#include "utils/logging_utils.h"
+#include "utils/result.hpp"
+
+namespace cortex::migr {
+class MigrationHelper {
+ public:
+  cpp::result<bool, std::string> BackupDatabase(
+      const std::filesystem::path& src_db_path,
+      const std::string& backup_db_path);
+
+  cpp::result<bool, std::string> RestoreDatabase(
+      const std::string& backup_db_path,
+      const std::filesystem::path& target_db_path);
+};
+}  // namespace cortex::migr
diff --git a/engine/migrations/migration_manager.cc b/engine/migrations/migration_manager.cc
new file mode 100644
index 000000000..f4b4f8046
--- /dev/null
+++ b/engine/migrations/migration_manager.cc
@@ -0,0 +1,224 @@
+#include "migration_manager.h"
+#include <filesystem>
+#include "assert.h"
+#include "schema_version.h"
+#include "utils/file_manager_utils.h"
+#include "utils/scope_exit.h"
+
+namespace cortex::migr {
+
+namespace {
+int GetSchemaVersion(SQLite::Database& db) {
+  int version = -1;  // Default version if not set
+
+  try {
+    SQLite::Statement query(db, "SELECT version FROM schema_version LIMIT 1;");
+
+    // Execute the query and get the result
+    if (query.executeStep()) {
+      version =
+          query.getColumn(0).getInt();  // Get the version from the first column
+    }
+  } catch (const std::exception& e) {
+    // CTL_WRN("SQLite error: " << e.what());
+  }
+
+  return version;
+}
+
+constexpr const auto kCortexDb = "cortex.db";
+constexpr const auto kCortexDbBackup = "cortex_backup.db";
+}  // namespace
+
+cpp::result<bool, std::string> MigrationManager::Migrate() {
+  namespace fmu = file_manager_utils;
+  int last_schema_version = GetSchemaVersion(db_);
+  int target_schema_version = SCHEMA_VERSION;
+  if (last_schema_version == target_schema_version)
+    return true;
+  // Back up all data before migrating
+  if (std::filesystem::exists(fmu::GetCortexDataPath() / kCortexDb)) {
+    auto src_db_path = (fmu::GetCortexDataPath() / kCortexDb);
+    auto backup_db_path = (fmu::GetCortexDataPath() / kCortexDbBackup);
+    if (auto res = mgr_helper_.BackupDatabase(src_db_path, backup_db_path.string());
+        res.has_error()) {
+      CTL_INF("Error: backup database failed!");
+      return res;
+    }
+  }
+
+  cortex::utils::ScopeExit se([]() {
+    auto cortex_tmp = fmu::GetCortexDataPath() / kCortexDbBackup;
+    if (std::filesystem::exists(cortex_tmp)) {
+      try {
+        auto n = std::filesystem::remove_all(cortex_tmp);
+        // CTL_INF("Deleted " << n << " files or directories");
+      } catch (const std::exception& e) {
+        CTL_WRN(e.what());
+      }
+    }
+  });
+
+  auto restore_db = [this]() -> cpp::result<bool, std::string> {
+    auto src_db_path = (fmu::GetCortexDataPath() / kCortexDb);
+    auto backup_db_path = (fmu::GetCortexDataPath() / kCortexDbBackup);
+    return mgr_helper_.BackupDatabase(src_db_path, backup_db_path.string());
+  };
+
+  // Backup folder structure
+  // Update logic if the folder structure changes
+
+  // Migrate folder structure
+  if (last_schema_version <= target_schema_version) {
+    if (auto res =
+            UpFolderStructure(last_schema_version, target_schema_version);
+        res.has_error()) {
+      // Restore
+      return res;
+    }
+  } else {
+    if (auto res =
+            DownFolderStructure(last_schema_version, target_schema_version);
+        res.has_error()) {
+      // Restore
+      return res;
+    }
+  }
+
+  // Update database on folder structure changes
+  // Update logic if the folder structure changes
+
+  // Migrate database
+  if (last_schema_version < target_schema_version) {
+    if (auto res = UpDB(last_schema_version, target_schema_version);
+        res.has_error()) {
+      auto r = restore_db();
+      return res;
+    }
+  } else {
+    if (auto res = DownDB(last_schema_version, target_schema_version);
+        res.has_error()) {
+      auto r = restore_db();
+      return res;
+    }
+  }
+  return true;
+}
+
+cpp::result<bool, std::string> MigrationManager::UpFolderStructure(int current,
+                                                                   int target) {
+  assert(current < target);
+  for (int v = current + 1; v <= target; v++) {
+    if (auto res = DoUpFolderStructure(v /*version*/); res.has_error()) {
+      return res;
+    }
+  }
+  return true;
+}
+
+cpp::result<bool, std::string> MigrationManager::DownFolderStructure(
+    int current, int target) {
+  assert(current > target);
+  for (int v = current; v > target; v--) {
+    if (auto res = DoDownFolderStructure(v /*version*/); res.has_error()) {
+      return res;
+    }
+  }
+  return true;
+}
+
+cpp::result<bool, std::string> MigrationManager::DoUpFolderStructure(
+    int version) {
+  switch (version) {
+    case 0:
+      return v0::MigrateFolderStructureUp();
+      break;
+
+    default:
+      return true;
+  }
+}
+cpp::result<bool, std::string> MigrationManager::DoDownFolderStructure(
+    int version) {
+  switch (version) {
+    case 0:
+      return v0::MigrateFolderStructureDown();
+      break;
+
+    default:
+      return true;
+  }
+}
+
+cpp::result<bool, std::string> MigrationManager::UpDB(int current, int target) {
+  assert(current < target);
+  for (int v = current + 1; v <= target; v++) {
+    if (auto res = DoUpDB(v /*version*/); res.has_error()) {
+      return res;
+    }
+  }
+  // Save database
+  return UpdateSchemaVersion(current, target);
+}
+cpp::result<bool, std::string> MigrationManager::DownDB(int current,
+                                                        int target) {
+  assert(current > target);
+  for (int v = current; v > target; v--) {
+    if (auto res = DoDownDB(v /*version*/); res.has_error()) {
+      return res;
+    }
+  }
+  // Save database
+  return UpdateSchemaVersion(current, target);
+}
+
+cpp::result<bool, std::string> MigrationManager::DoUpDB(int version) {
+  switch (version) {
+    case 0:
+      return v0::MigrateDBUp(db_);
+      break;
+
+    default:
+      return true;
+  }
+}
+
+cpp::result<bool, std::string> MigrationManager::DoDownDB(int version) {
+  switch (version) {
+    case 0:
+      return v0::MigrateDBDown(db_);
+      break;
+
+    default:
+      return true;
+  }
+}
+
+cpp::result<bool, std::string> MigrationManager::UpdateSchemaVersion(
+    int old_version, int new_version) {
+  if (old_version == new_version)
+    return true;
+  try {
+    db_.exec("BEGIN TRANSACTION;");
+
+    SQLite::Statement insert(db_,
+                             "INSERT INTO schema_version (version) VALUES (?)");
+    insert.bind(1, new_version);
+    insert.exec();
+
+    if (old_version != -1) {
+      SQLite::Statement del(db_,
+                            "DELETE FROM schema_version WHERE version = ?");
+      del.bind(1, old_version);
+      del.exec();
+    }
+
+    db_.exec("COMMIT;");
+    // CTL_INF("Inserted: " << version);
+    return true;
+  } catch (const std::exception& e) {
+    CTL_WRN(e.what());
+    return cpp::fail(e.what());
+  }
+}
+}  // namespace cortex::migr
\ No newline at end of file
diff --git a/engine/migrations/migration_manager.h b/engine/migrations/migration_manager.h
new file mode 100644
index 000000000..b05a76c26
--- /dev/null
+++ b/engine/migrations/migration_manager.h
@@ -0,0 +1,31 @@
+#pragma once
+#include "migration_helper.h"
+#include "v0/migration.h"
+
+namespace cortex::migr {
+class MigrationManager {
+ public:
+  explicit MigrationManager(SQLite::Database& db) : db_(db) {}
+  cpp::result<bool, std::string> Migrate();
+
+ private:
+  cpp::result<bool, std::string> UpFolderStructure(int current, int target);
+  cpp::result<bool, std::string> DownFolderStructure(int current, int target);
+
+  cpp::result<bool, std::string> DoUpFolderStructure(int version);
+  cpp::result<bool, std::string> DoDownFolderStructure(int version);
+
+  cpp::result<bool, std::string> UpDB(int current, int target);
+  cpp::result<bool, std::string> DownDB(int current, int target);
+
+  cpp::result<bool, std::string> DoUpDB(int version);
+  cpp::result<bool, std::string> DoDownDB(int version);
+
+  cpp::result<bool, std::string> UpdateSchemaVersion(int old_version,
+                                                     int new_version);
+
+ private:
+  MigrationHelper mgr_helper_;
+  SQLite::Database& db_;
+};
+}  // namespace cortex::migr
\ No newline at end of file
diff --git a/engine/migrations/schema_version.h b/engine/migrations/schema_version.h
new file mode 100644
index 000000000..7cfccf27a
--- /dev/null
+++ b/engine/migrations/schema_version.h
@@ -0,0 +1,4 @@
+#pragma once
+
+//Track the current schema version
+#define SCHEMA_VERSION 0
\ No newline at end of file
diff --git a/engine/migrations/v0/migration.h b/engine/migrations/v0/migration.h
new file mode 100644
index 000000000..9d44435c5
--- /dev/null
+++ b/engine/migrations/v0/migration.h
@@ -0,0 +1,88 @@
+#pragma once
+#include <SQLiteCpp/Database.h>
+#include <filesystem>
+#include <string>
+#include "utils/file_manager_utils.h"
+#include "utils/logging_utils.h"
+#include "utils/result.hpp"
+
+namespace cortex::migr::v0 {
+// Data folder
+namespace fmu = file_manager_utils;
+
+// cortexcpp
+//   |__ models
+//   |     |__ cortex.so
+//   |          |__ tinyllama
+//   |                |__ gguf
+//   |__ engines
+//   |     |__ cortex.llamacpp
+//   |           |__ deps
+//   |           |__ windows-amd64-avx
+//   |__ logs
+//
+inline cpp::result<bool, std::string> MigrateFolderStructureUp() {
+  if (!std::filesystem::exists(fmu::GetCortexDataPath() / "models")) {
+    std::filesystem::create_directory(fmu::GetCortexDataPath() / "models");
+  }
+
+  if (!std::filesystem::exists(fmu::GetCortexDataPath() / "engines")) {
+    std::filesystem::create_directory(fmu::GetCortexDataPath() / "engines");
+  }
+
+  if (!std::filesystem::exists(fmu::GetCortexDataPath() / "logs")) {
+    std::filesystem::create_directory(fmu::GetCortexDataPath() / "logs");
+  }
+
+  return true;
+}
+
+inline cpp::result<bool, std::string> MigrateFolderStructureDown() {
+  // CTL_INF("Folder structure already up to date!");
+  return true;
+}
+
+// Database
+inline cpp::result<bool, std::string> MigrateDBUp(SQLite::Database& db) {
+  try {
+    db.exec(
+        "CREATE TABLE IF NOT EXISTS schema_version ( version INTEGER PRIMARY "
+        "KEY);");
+
+    db.exec(
+        "CREATE TABLE IF NOT EXISTS models ("
+        "model_id TEXT PRIMARY KEY,"
+        "author_repo_id TEXT,"
+        "branch_name TEXT,"
+        "path_to_model_yaml TEXT,"
+        "model_alias TEXT);");
+
+    db.exec(
+        "CREATE TABLE IF NOT EXISTS hardware ("
+        "uuid TEXT PRIMARY KEY, "
+        "type TEXT NOT NULL, "
+        "hardware_id INTEGER NOT NULL, "
+        "software_id INTEGER NOT NULL, "
+        "activated INTEGER NOT NULL CHECK (activated IN (0, 1)));");
+
+    // CTL_INF("Database migration up completed successfully.");
+    return true;
+  } catch (const std::exception& e) {
+    CTL_WRN("Migration up failed: " << e.what());
+    return cpp::fail(e.what());
+  }
+};
+
+inline cpp::result<bool, std::string> MigrateDBDown(SQLite::Database& db) {
+  try {
+    db.exec("DROP TABLE IF EXISTS hardware;");
+    db.exec("DROP TABLE IF EXISTS models;");
+    // CTL_INF("Migration down completed successfully.");
+    return true;
+  } catch (const std::exception& e) {
+    CTL_WRN("Migration down failed: " << e.what());
+    return cpp::fail(e.what());
+  }
+}
+
+};  // namespace cortex::migr::v0
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index 09c4d1a75..d855c8f61 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -544,10 +544,16 @@ void DownloadService::EmitTaskCompleted(const std::string& task_id) {
 }
 
 void DownloadService::ExecuteCallback(const DownloadTask& task) {
-  std::lock_guard<std::mutex> lock(callbacks_mutex_);
-  auto it = callbacks_.find(task.id);
-  if (it != callbacks_.end()) {
-    it->second(task);
-    callbacks_.erase(it);
+  std::lock_guard<std::mutex> active_task_lock(active_tasks_mutex_);
+  if (auto it = active_tasks_.find(task.id); it != active_tasks_.end()) {
+    for (auto& item : it->second->items) {
+      item.downloadedBytes = item.bytes;
+    }
+    std::lock_guard<std::mutex> lock(callbacks_mutex_);
+    auto callback = callbacks_.find(task.id);
+    if (callback != callbacks_.end()) {
+      callback->second(*it->second);
+      callbacks_.erase(callback);
+    }
   }
 }
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 8d8a4a65c..4eebff669 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -4,6 +4,7 @@
 #include <optional>
 #include "algorithm"
 #include "utils/archive_utils.h"
+#include "utils/cortex_utils.h"
 #include "utils/engine_constants.h"
 #include "utils/engine_matcher_utils.h"
 #include "utils/file_manager_utils.h"
@@ -69,12 +70,12 @@ std::string GetEnginePath(std::string_view e) {
 };
 }  // namespace
 
-cpp::result<void, std::string> EngineService::InstallEngineAsyncV2(
+cpp::result<void, std::string> EngineService::InstallEngineAsync(
     const std::string& engine, const std::string& version,
     const std::optional<std::string> variant_name) {
   auto ne = NormalizeEngine(engine);
-  CTL_INF("InstallEngineAsyncV2: " << ne << ", " << version << ", "
-                                   << variant_name.value_or(""));
+  CTL_INF("InstallEngineAsync: " << ne << ", " << version << ", "
+                                 << variant_name.value_or(""));
   auto os = hw_inf_.sys_inf->os;
   if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) {
     return cpp::fail("Engine " + ne + " is not supported on macOS");
@@ -84,7 +85,7 @@ cpp::result<void, std::string> EngineService::InstallEngineAsyncV2(
     return cpp::fail("Engine " + ne + " is not supported on Linux");
   }
 
-  auto result = DownloadEngineV2(ne, version, variant_name);
+  auto result = DownloadEngine(ne, version, variant_name);
   if (result.has_error()) {
     return cpp::fail(result.error());
   }
@@ -95,25 +96,6 @@ cpp::result<void, std::string> EngineService::InstallEngineAsyncV2(
   return {};
 }
 
-cpp::result<bool, std::string> EngineService::InstallEngineAsync(
-    const std::string& engine, const std::string& version,
-    const std::string& src) {
-  // Although this function is called async, only download tasks are performed async
-  auto ne = NormalizeEngine(engine);
-  if (!src.empty()) {
-    auto res = UnzipEngine(ne, version, src);
-    // If has error or engine is installed successfully
-    if (res.has_error() || res.value()) {
-      return res;
-    }
-  }
-  auto result = DownloadEngine(ne, version, true /*async*/);
-  if (result.has_error()) {
-    return result;
-  }
-  return DownloadCuda(ne, true /*async*/);
-}
-
 cpp::result<bool, std::string> EngineService::UnzipEngine(
     const std::string& engine, const std::string& version,
     const std::string& path) {
@@ -242,7 +224,7 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
   }
 }
 
-cpp::result<void, std::string> EngineService::DownloadEngineV2(
+cpp::result<void, std::string> EngineService::DownloadEngine(
     const std::string& engine, const std::string& version,
     const std::optional<std::string> variant_name) {
   auto normalized_version = version == "latest"
@@ -377,101 +359,6 @@ cpp::result<void, std::string> EngineService::DownloadEngineV2(
   return {};
 }
 
-cpp::result<bool, std::string> EngineService::DownloadEngine(
-    const std::string& engine, const std::string& version, bool async) {
-  auto res = GetEngineVariants(engine, version);
-  if (res.has_error()) {
-    return cpp::fail("Failed to fetch engine releases: " + res.error());
-  }
-
-  if (res.value().empty()) {
-    return cpp::fail("No release found for " + version);
-  }
-
-  auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch};
-
-  std::vector<std::string> variants;
-  for (const auto& asset : res.value()) {
-    variants.push_back(asset.name);
-  }
-
-  CTL_INF("engine: " << engine);
-  CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version);
-  auto matched_variant = GetMatchedVariant(engine, variants);
-  CTL_INF("Matched variant: " << matched_variant);
-  if (matched_variant.empty()) {
-    CTL_ERR("No variant found for " << os_arch);
-    return cpp::fail("No variant found for " + os_arch);
-  }
-
-  for (const auto& asset : res.value()) {
-    if (asset.name == matched_variant) {
-      CTL_INF("Download url: " << asset.browser_download_url);
-
-      std::filesystem::path engine_folder_path =
-          file_manager_utils::GetContainerFolderPath(
-              file_manager_utils::DownloadTypeToString(DownloadType::Engine)) /
-          engine;
-
-      if (!std::filesystem::exists(engine_folder_path)) {
-        CTL_INF("Creating " << engine_folder_path.string());
-        std::filesystem::create_directories(engine_folder_path);
-      }
-      if (IsEngineLoaded(engine)) {
-        CTL_INF("Engine " << engine << " is already loaded, unloading it");
-        auto unload_res = UnloadEngine(engine);
-        if (unload_res.has_error()) {
-          CTL_INF("Failed to unload engine: " << unload_res.error());
-          return cpp::fail(unload_res.error());
-        } else {
-          CTL_INF("Engine " << engine << " unloaded successfully");
-        }
-      }
-      CTL_INF("Engine folder path: " << engine_folder_path.string() << "\n");
-      auto local_path = engine_folder_path / asset.name;
-      auto downloadTask{
-          DownloadTask{.id = engine,
-                       .type = DownloadType::Engine,
-                       .items = {DownloadItem{
-                           .id = engine,
-                           .downloadUrl = asset.browser_download_url,
-                           .localPath = local_path,
-                       }}}};
-
-      auto on_finished = [](const DownloadTask& finishedTask) {
-        // try to unzip the downloaded file
-        CTL_INF(
-            "Engine zip path: " << finishedTask.items[0].localPath.string());
-
-        std::filesystem::path extract_path =
-            finishedTask.items[0].localPath.parent_path().parent_path();
-
-        archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
-                                      extract_path.string());
-
-        // remove the downloaded file
-        try {
-          std::filesystem::remove(finishedTask.items[0].localPath);
-        } catch (const std::exception& e) {
-          CTL_WRN("Could not delete file: " << e.what());
-        }
-        CTL_INF("Finished!");
-      };
-
-      if (async) {
-        auto res = download_service_->AddTask(downloadTask, on_finished);
-        if (res.has_error()) {
-          return cpp::fail(res.error());
-        }
-        return true;
-      } else {
-        return download_service_->AddDownloadTask(downloadTask, on_finished);
-      }
-    }
-  }
-  return true;
-}
-
 cpp::result<bool, std::string> EngineService::DownloadCuda(
     const std::string& engine, bool async) {
   if (hw_inf_.sys_inf->os == "mac" || engine == kOnnxRepo ||
@@ -778,14 +665,17 @@ cpp::result<void, std::string> EngineService::LoadEngine(
 
   CTL_INF("Selected engine variant: "
           << json_helper::DumpJsonString(selected_engine_variant->ToJson()));
-
+#if defined(_WIN32)
+  auto user_defined_engine_path = _wgetenv(L"ENGINE_PATH");
+#else
   auto user_defined_engine_path = getenv("ENGINE_PATH");
+#endif
+
   CTL_DBG("user defined engine path: " << user_defined_engine_path);
   const std::filesystem::path engine_dir_path = [&] {
     if (user_defined_engine_path != nullptr) {
-      return std::filesystem::path(user_defined_engine_path +
-                                   GetEnginePath(ne)) /
-             selected_engine_variant->variant /
+      return std::filesystem::path(user_defined_engine_path) /
+             GetEnginePath(ne) / selected_engine_variant->variant /
              selected_engine_variant->version;
     } else {
       return file_manager_utils::GetEnginesContainerPath() / ne /
@@ -815,9 +705,9 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
     // 3. Add dll directory if met other conditions
 
-    auto add_dll = [this](const std::string& e_type, const std::string& p) {
-      auto ws = std::wstring(p.begin(), p.end());
-      if (auto cookie = AddDllDirectory(ws.c_str()); cookie != 0) {
+    auto add_dll = [this](const std::string& e_type,
+                          const std::filesystem::path& p) {
+      if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
         CTL_DBG("Added dll directory: " << p);
         engines_[e_type].cookie = cookie;
       } else {
@@ -834,7 +724,11 @@ cpp::result<void, std::string> EngineService::LoadEngine(
       }
     };
 
+#if defined(_WIN32)
+    if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
+#else
     if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
+#endif
         should_use_dll_search_path) {
       if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
           should_use_dll_search_path) {
@@ -850,11 +744,11 @@ cpp::result<void, std::string> EngineService::LoadEngine(
           CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
         }
 
-        add_dll(ne, engine_dir_path.string());
+        add_dll(ne, engine_dir_path);
       } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
         // Do nothing
       } else {
-        add_dll(ne, engine_dir_path.string());
+        add_dll(ne, engine_dir_path);
       }
     }
 #endif
@@ -1032,8 +926,8 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
           << default_variant->variant << " is not up-to-date! Current: "
           << default_variant->version << ", latest: " << latest_version->name);
 
-  auto res = InstallEngineAsyncV2(engine, latest_version->tag_name,
-                                  default_variant->variant);
+  auto res = InstallEngineAsync(engine, latest_version->tag_name,
+                                default_variant->variant);
 
   return EngineUpdateResult{.engine = engine,
                             .variant = default_variant->variant,
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index b339fd7df..dee8a530b 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -4,6 +4,7 @@
 #include <string>
 #include <string_view>
 #include <vector>
+#include "common/engine_servicei.h"
 #include "cortex-common/EngineI.h"
 #include "cortex-common/cortexpythoni.h"
 #include "services/download_service.h"
@@ -13,7 +14,6 @@
 #include "utils/github_release_utils.h"
 #include "utils/result.hpp"
 #include "utils/system_info_utils.h"
-#include "common/engine_servicei.h"
 
 struct EngineUpdateResult {
   std::string engine;
@@ -37,7 +37,7 @@ struct SystemInfo;
 
 using EngineV = std::variant<EngineI*, CortexPythonEngineI*>;
 
-class EngineService: public EngineServiceI {
+class EngineService : public EngineServiceI {
  private:
   using EngineRelease = github_release_utils::GitHubRelease;
   using EngineVariant = github_release_utils::GitHubAsset;
@@ -69,17 +69,13 @@ class EngineService: public EngineServiceI {
    */
   cpp::result<bool, std::string> IsEngineReady(const std::string& engine) const;
 
-  cpp::result<bool, std::string> InstallEngineAsync(
-      const std::string& engine, const std::string& version = "latest",
-      const std::string& src = "");
-
   /**
    * Handling install engine variant.
    *
    * If no version provided, choose `latest`.
    * If no variant provided, automatically pick the best variant.
    */
-  cpp::result<void, std::string> InstallEngineAsyncV2(
+  cpp::result<void, std::string> InstallEngineAsync(
       const std::string& engine, const std::string& version,
       const std::optional<std::string> variant_name);
 
@@ -125,11 +121,7 @@ class EngineService: public EngineServiceI {
       const std::string& engine);
 
  private:
-  cpp::result<bool, std::string> DownloadEngine(
-      const std::string& engine, const std::string& version = "latest",
-      bool async = false);
-
-  cpp::result<void, std::string> DownloadEngineV2(
+  cpp::result<void, std::string> DownloadEngine(
       const std::string& engine, const std::string& version = "latest",
       const std::optional<std::string> variant_name = std::nullopt);
 
diff --git a/engine/services/file_watcher_service.h b/engine/services/file_watcher_service.h
new file mode 100644
index 000000000..d15b98827
--- /dev/null
+++ b/engine/services/file_watcher_service.h
@@ -0,0 +1,364 @@
+#include <atomic>
+#include <memory>
+#include <string>
+#include <thread>
+#include "services/model_service.h"
+#include "utils/logging_utils.h"
+
+#ifdef __APPLE__
+#include <CoreServices/CoreServices.h>
+#include <sys/stat.h>
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+#else  // Linux
+#include <limits.h>
+#include <poll.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+#endif
+
+class FileWatcherService {
+ private:
+#if defined(_WIN32)
+  HANDLE dir_handle = INVALID_HANDLE_VALUE;
+  HANDLE stop_event;
+#elif defined(__APPLE__)
+  FSEventStreamRef event_stream;
+#else  // Linux
+  int fd;
+  int wd;
+  std::unordered_map<int, std::string> watch_descriptors;
+#endif
+
+ public:
+  FileWatcherService(const std::string& path,
+                     std::shared_ptr<ModelService> model_service)
+      : watch_path_{path}, running_{false}, model_service_{model_service} {
+    if (!std::filesystem::exists(path)) {
+      throw std::runtime_error("Path does not exist: " + path);
+    }
+#ifdef _WIN32
+    stop_event = CreateEvent(NULL, TRUE, FALSE, NULL);
+#endif
+    CTL_INF("FileWatcherService created: " + path);
+  }
+
+  ~FileWatcherService() {
+    CTL_INF("FileWatcherService destructor");
+    stop();
+  }
+
+  void start() {
+    if (running_) {
+      return;
+    }
+
+    running_ = true;
+    watch_thread_ = std::thread(&FileWatcherService::WatcherThread, this);
+  }
+
+  void stop() {
+    if (!running_) {
+      return;
+    }
+
+    running_ = false;
+
+#ifdef _WIN32
+    // Signal the stop event
+    SetEvent(stop_event);
+#elif defined(__APPLE__)
+    if (event_stream) {
+      FSEventStreamStop(event_stream);
+      FSEventStreamInvalidate(event_stream);
+    }
+#else  // Linux
+    // For Linux, closing the fd will interrupt the read() call
+    CTL_INF("before close fd!");
+    if (fd >= 0) {
+      close(fd);
+    }
+#endif
+    CTL_INF("before join!");
+    // Add timeout to avoid infinite waiting
+    if (watch_thread_.joinable()) {
+      watch_thread_.join();
+    }
+
+#ifdef _WIN32
+    if (stop_event != NULL) {
+      CloseHandle(stop_event);
+    }
+    if (dir_handle != INVALID_HANDLE_VALUE) {
+      CloseHandle(dir_handle);
+    }
+#elif defined(__APPLE__)
+    if (event_stream) {
+      FSEventStreamRelease(event_stream);
+    }
+#else  // Linux
+    CleanupWatches();
+#endif
+    CTL_INF("FileWatcherService stopped!");
+  }
+
+ private:
+  std::string watch_path_;
+  std::atomic<bool> running_;
+  std::thread watch_thread_;
+  std::shared_ptr<ModelService> model_service_;
+
+#ifdef __APPLE__
+
+  static void callback(ConstFSEventStreamRef streamRef,
+                       void* clientCallBackInfo, size_t numEvents,
+                       void* eventPaths,
+                       const FSEventStreamEventFlags eventFlags[],
+                       const FSEventStreamEventId eventIds[]) {
+    auto** paths = (char**)eventPaths;
+    auto* watcher = static_cast<FileWatcherService*>(clientCallBackInfo);
+
+    for (size_t i = 0; i < numEvents; i++) {
+      if (eventFlags[i] & (kFSEventStreamEventFlagItemRemoved |
+                           kFSEventStreamEventFlagItemRenamed |
+                           kFSEventStreamEventFlagItemModified)) {
+        CTL_INF("File removed: " + std::string(paths[i]));
+        CTL_INF("File event detected: " + std::string(paths[i]) +
+                " flags: " + std::to_string(eventFlags[i]));
+        watcher->model_service_->ForceIndexingModelList();
+      }
+    }
+  }
+
+  void WatcherThread() {
+    CFRunLoopRef runLoop = CFRunLoopGetCurrent();
+
+    auto path = CFStringCreateWithCString(nullptr, watch_path_.c_str(),
+                                          kCFStringEncodingUTF8);
+    auto path_to_watch =
+        CFArrayCreate(nullptr, (const void**)&path, 1, &kCFTypeArrayCallBacks);
+
+    FSEventStreamContext context = {0, this, nullptr, nullptr, nullptr};
+
+    event_stream = FSEventStreamCreate(
+        nullptr, &FileWatcherService::callback, &context, path_to_watch,
+        kFSEventStreamEventIdSinceNow, 1,  // each second
+        kFSEventStreamCreateFlagFileEvents | kFSEventStreamCreateFlagNoDefer |
+            kFSEventStreamCreateFlagWatchRoot);
+
+    if (!event_stream) {
+      CFRelease(path_to_watch);
+      CFRelease(path);
+      throw std::runtime_error("Failed to create FSEvent stream");
+    }
+
+    FSEventStreamScheduleWithRunLoop(event_stream, runLoop,
+                                     kCFRunLoopDefaultMode);
+
+    if (!FSEventStreamStart(event_stream)) {
+      FSEventStreamInvalidate(event_stream);
+      FSEventStreamRelease(event_stream);
+      CFRelease(path_to_watch);
+      CFRelease(path);
+      throw std::runtime_error("Failed to start FSEvent stream");
+    }
+
+    while (running_) {
+      CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.25, true);
+    }
+
+    FSEventStreamStop(event_stream);
+    FSEventStreamInvalidate(event_stream);
+    FSEventStreamRelease(event_stream);
+    CFRelease(path_to_watch);
+    CFRelease(path);
+  }
+
+#elif defined(_WIN32)
+  void WatcherThread() {
+    dir_handle =
+        CreateFileA(watch_path_.c_str(), FILE_LIST_DIRECTORY,
+                    FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING,
+                    FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED, NULL);
+
+    if (dir_handle == INVALID_HANDLE_VALUE) {
+      throw std::runtime_error("Failed to open directory");
+    }
+
+    char buffer[4096];
+    OVERLAPPED overlapped = {0};
+    overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+    DWORD bytesReturned;
+    HANDLE events[] = {overlapped.hEvent, stop_event};
+    while (running_) {
+      if (!ReadDirectoryChangesW(
+              dir_handle, buffer, sizeof(buffer), TRUE,
+              FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME,
+              &bytesReturned, &overlapped, NULL)) {
+        break;
+      }
+
+      // Wait for either file change event or stop event
+      DWORD result = WaitForMultipleObjects(2, events, FALSE, INFINITE);
+      if (result == WAIT_OBJECT_0 + 1) {  // stop_event was signaled
+        break;
+      }
+
+      if (result != WAIT_OBJECT_0 ||
+          !GetOverlappedResult(dir_handle, &overlapped, &bytesReturned,
+                               FALSE)) {
+        break;
+      }
+
+      FILE_NOTIFY_INFORMATION* event = (FILE_NOTIFY_INFORMATION*)buffer;
+      do {
+        if (event->Action == FILE_ACTION_REMOVED) {
+          std::wstring fileName(event->FileName,
+                                event->FileNameLength / sizeof(wchar_t));
+
+          std::string file_name_str(fileName.begin(), fileName.end());
+          model_service_->ForceIndexingModelList();
+        }
+
+        if (event->NextEntryOffset == 0)
+          break;
+        event = (FILE_NOTIFY_INFORMATION*)((uint8_t*)event +
+                                           event->NextEntryOffset);
+      } while (true);
+
+      ResetEvent(overlapped.hEvent);
+    }
+
+    CloseHandle(overlapped.hEvent);
+    CloseHandle(dir_handle);
+  }
+
+#else  // Linux
+
+  void AddWatch(const std::string& dirPath) {
+    const int watch_flags = IN_DELETE | IN_DELETE_SELF | IN_CREATE;
+    wd = inotify_add_watch(fd, dirPath.c_str(), watch_flags);
+    if (wd < 0) {
+      throw std::runtime_error("Failed to add watch on " + dirPath + ": " +
+                               std::string(strerror(errno)));
+    }
+    watch_descriptors[wd] = dirPath;
+
+    // Add watches for subdirectories
+    try {
+      for (const auto& entry :
+           std::filesystem::recursive_directory_iterator(dirPath)) {
+        if (std::filesystem::is_directory(entry)) {
+          int subwd = inotify_add_watch(fd, entry.path().c_str(), watch_flags);
+          if (subwd >= 0) {
+            watch_descriptors[subwd] = entry.path().string();
+          } else {
+            CTL_ERR("Failed to add watch for subdirectory " +
+                    entry.path().string() + ": " +
+                    std::string(strerror(errno)));
+          }
+        }
+      }
+    } catch (const std::filesystem::filesystem_error& e) {
+      CTL_ERR("Error walking directory tree: " + std::string(e.what()));
+    }
+  }
+
+  void CleanupWatches() {
+    CTL_INF("Cleanup Watches");
+    for (const auto& [wd, path] : watch_descriptors) {
+      inotify_rm_watch(fd, wd);
+    }
+    watch_descriptors.clear();
+
+    if (fd >= 0) {
+      close(fd);
+      fd = -1;
+    }
+  }
+
+  void WatcherThread() {
+    fd = inotify_init1(IN_NONBLOCK);
+    if (fd < 0) {
+      CTL_ERR("Failed to initialize inotify: " + std::string(strerror(errno)));
+      return;
+    }
+
+    try {
+      AddWatch(watch_path_);
+    } catch (const std::exception& e) {
+      CTL_ERR("Failed to add watch: " + std::string(e.what()));
+      close(fd);
+      return;
+    }
+
+    const int POLL_TIMEOUT_MS = 1000;  // 1 second timeout
+    char buffer[4096];
+    struct pollfd pfd = {.fd = fd, .events = POLLIN, .revents = 0};
+
+    while (running_) {
+      // Poll will sleep until either:
+      // 1. Events are available (POLLIN)
+      // 2. POLL_TIMEOUT_MS milliseconds have elapsed
+      // 3. An error occurs
+      int poll_result = poll(&pfd, 1, POLL_TIMEOUT_MS);
+
+      if (poll_result < 0) {
+        if (errno == EINTR) {
+          // System call was interrupted, just retry
+          continue;
+        }
+        CTL_ERR("Poll failed: " + std::string(strerror(errno)));
+        break;
+      }
+
+      if (poll_result == 0) {  // Timeout - no events
+        // No need to sleep - poll() already waited
+        continue;
+      }
+
+      if (pfd.revents & POLLERR || pfd.revents & POLLNVAL) {
+        CTL_ERR("Poll error on fd");
+        break;
+      }
+
+      // Read all pending events
+      while (running_) {
+        int length = read(fd, buffer, sizeof(buffer));
+        if (length < 0) {
+          if (errno == EAGAIN || errno == EWOULDBLOCK) {
+            // No more events to read
+            break;
+          }
+          CTL_ERR("Read error: " + std::string(strerror(errno)));
+          break;
+        }
+
+        if (length == 0) {
+          break;
+        }
+
+        // Process events
+        size_t i = 0;
+        while (i < static_cast<size_t>(length)) {
+          struct inotify_event* event =
+              reinterpret_cast<struct inotify_event*>(&buffer[i]);
+
+          if (event->mask & (IN_DELETE | IN_DELETE_SELF)) {
+            try {
+              model_service_->ForceIndexingModelList();
+            } catch (const std::exception& e) {
+              CTL_ERR("Error processing delete event: " +
+                      std::string(e.what()));
+            }
+          }
+
+          i += sizeof(struct inotify_event) + event->len;
+        }
+      }
+    }
+  }
+#endif
+};
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 905b17107..16ae234b4 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -7,9 +7,10 @@
 #include <processenv.h>
 #endif
 #include "cli/commands/cortex_upd_cmd.h"
-#include "database/hardwares.h"
+#include "database/hardware.h"
 #include "services/engine_service.h"
 #include "utils/cortex_utils.h"
+#include "utils/widechar_conv.h"
 
 namespace services {
 
@@ -115,24 +116,33 @@ bool HardwareService::Restart(const std::string& host, int port) {
   ZeroMemory(&si, sizeof(si));
   si.cb = sizeof(si);
   ZeroMemory(&pi, sizeof(pi));
-  std::string params = "--ignore_cout";
-  params += " --config_file_path " + get_config_file_path();
-  params += " --data_folder_path " + get_data_folder_path();
-  params += " --loglevel " + luh::LogLevelStr(luh::global_log_level);
-  std::string cmds = cortex_utils::GetCurrentPath() + "/" + exe + " " + params;
+  // TODO (sang) write a common function for this and server_start_cmd
+  std::wstring params = L"--ignore_cout";
+  params += L" --config_file_path " +
+            file_manager_utils::GetConfigurationPath().wstring();
+  params += L" --data_folder_path " +
+            file_manager_utils::GetCortexDataPath().wstring();
+  params += L" --loglevel " +
+            cortex::wc::Utf8ToWstring(luh::LogLevelStr(luh::global_log_level));
+  std::wstring exe_w = cortex::wc::Utf8ToWstring(exe);
+  std::wstring current_path_w =
+      file_manager_utils::GetExecutableFolderContainerPath().wstring();
+  std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params;
+  std::vector<wchar_t> mutable_cmds(wcmds.begin(), wcmds.end());
+  mutable_cmds.push_back(L'\0');
   // Create child process
   if (!CreateProcess(
           NULL,  // No module name (use command line)
-          const_cast<char*>(
-              cmds.c_str()),  // Command line (replace with your actual executable)
-          NULL,               // Process handle not inheritable
-          NULL,               // Thread handle not inheritable
-          TRUE,               // Handle inheritance
-          0,                  // No creation flags
-          NULL,               // Use parent's environment block
-          NULL,               // Use parent's starting directory
-          &si,                // Pointer to STARTUPINFO structure
-          &pi))               // Pointer to PROCESS_INFORMATION structure
+          mutable_cmds
+              .data(),  // Command line (replace with your actual executable)
+          NULL,         // Process handle not inheritable
+          NULL,         // Thread handle not inheritable
+          TRUE,         // Handle inheritance
+          0,            // No creation flags
+          NULL,         // Use parent's environment block
+          NULL,         // Use parent's starting directory
+          &si,          // Pointer to STARTUPINFO structure
+          &pi))         // Pointer to PROCESS_INFORMATION structure
   {
     std::cout << "Could not start server: " << GetLastError() << std::endl;
     return false;
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 793e8ecb5..1ec1a68cf 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -8,14 +8,15 @@
 #include "database/models.h"
 #include "hardware_service.h"
 #include "httplib.h"
-#include "services/engine_service.h"
 #include "utils/cli_selection_utils.h"
+#include "utils/cortex_utils.h"
 #include "utils/engine_constants.h"
 #include "utils/file_manager_utils.h"
 #include "utils/huggingface_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
 #include "utils/string_utils.h"
+#include "utils/widechar_conv.h"
 
 namespace {
 void ParseGguf(const DownloadItem& ggufDownloadItem,
@@ -114,6 +115,40 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
 }
 }  // namespace
 
+void ModelService::ForceIndexingModelList() {
+  CTL_INF("Force indexing model list");
+
+  cortex::db::Models modellist_handler;
+  config::YamlHandler yaml_handler;
+
+  auto list_entry = modellist_handler.LoadModelList();
+  if (list_entry.has_error()) {
+    CTL_ERR("Failed to load model list: " << list_entry.error());
+    return;
+  }
+
+  namespace fs = std::filesystem;
+  namespace fmu = file_manager_utils;
+
+  CTL_DBG("Database model size: " + std::to_string(list_entry.value().size()));
+  for (const auto& model_entry : list_entry.value()) {
+    try {
+      yaml_handler.ModelConfigFromFile(
+          fmu::ToAbsoluteCortexDataPath(
+              fs::path(model_entry.path_to_model_yaml))
+              .string());
+      auto model_config = yaml_handler.GetModelConfig();
+      Json::Value obj = model_config.ToJson();
+      yaml_handler.Reset();
+    } catch (const std::exception& e) {
+      // remove in db
+      auto remove_result =
+          modellist_handler.DeleteModelEntry(model_entry.model);
+      // silently ignore result
+    }
+  }
+}
+
 cpp::result<std::string, std::string> ModelService::DownloadModel(
     const std::string& input) {
   if (input.empty()) {
@@ -425,11 +460,18 @@ ModelService::DownloadModelFromCortexsoAsync(
       return;
     }
     auto url_obj = url_parser::FromUrlString(model_yml_item->downloadUrl);
-    CTL_INF("Adding model to modellist with branch: " << branch);
+    CTL_INF("Adding model to modellist with branch: "
+            << branch << ", path: " << model_yml_item->localPath.string());
     config::YamlHandler yaml_handler;
     yaml_handler.ModelConfigFromFile(model_yml_item->localPath.string());
     auto mc = yaml_handler.GetModelConfig();
     mc.model = unique_model_id;
+
+    uint64_t model_size = 0;
+    for (const auto& item : finishedTask.items) {
+      model_size = model_size + item.bytes.value_or(0);
+    }
+    mc.size = model_size;
     yaml_handler.UpdateModelConfig(mc);
     yaml_handler.WriteYamlFile(model_yml_item->localPath.string());
 
@@ -627,9 +669,13 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
 
       json_data = mc.ToJson();
       if (mc.files.size() > 0) {
-        // TODO(sang) support multiple files
+#if defined(_WIN32)
+        json_data["model_path"] = cortex::wc::WstringToUtf8(
+            fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
+#else
         json_data["model_path"] =
             fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
+#endif
       } else {
         LOG_WARN << "model_path is empty";
         return StartModelResult{.success = false};
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 7b6375e54..be450fb0b 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -39,6 +39,8 @@ struct StartModelResult {
 
 class ModelService {
  public:
+  void ForceIndexingModelList();
+
   explicit ModelService(std::shared_ptr<DownloadService> download_service)
       : download_service_{download_service} {};
 
diff --git a/engine/test/components/test_models_db.cc b/engine/test/components/test_models_db.cc
index ef54fe7e0..8c3ebbe00 100644
--- a/engine/test/components/test_models_db.cc
+++ b/engine/test/components/test_models_db.cc
@@ -13,7 +13,19 @@ class ModelsTestSuite : public ::testing::Test {
         model_list_(db_) {}
   void SetUp() {
     try {
-      db_.exec("DELETE FROM models");
+      db_.exec(
+          "CREATE TABLE IF NOT EXISTS models ("
+          "model_id TEXT PRIMARY KEY,"
+          "author_repo_id TEXT,"
+          "branch_name TEXT,"
+          "path_to_model_yaml TEXT,"
+          "model_alias TEXT);");
+    } catch (const std::exception& e) {}
+  }
+
+  void TearDown() {
+    try {
+      db_.exec("DROP TABLE IF EXISTS models;");
     } catch (const std::exception& e) {}
   }
 
diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
index 187e1b4ef..3176339a0 100644
--- a/engine/utils/config_yaml_utils.h
+++ b/engine/utils/config_yaml_utils.h
@@ -86,6 +86,9 @@ class CortexConfigMgr {
       if (!out_file) {
         throw std::runtime_error("Failed to open output file.");
       }
+      // Workaround to save file as utf8 BOM
+      const unsigned char utf8_bom[] = {0xEF, 0xBB, 0xBF};
+      out_file.write(reinterpret_cast<const char*>(utf8_bom), sizeof(utf8_bom));
       YAML::Node node;
       node["logFolderPath"] = config.logFolderPath;
       node["logLlamaCppPath"] = config.logLlamaCppPath;
diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h
index 2d250df72..895217250 100644
--- a/engine/utils/cortex_utils.h
+++ b/engine/utils/cortex_utils.h
@@ -3,7 +3,10 @@
 #include <drogon/HttpResponse.h>
 #include <sys/stat.h>
 #include <algorithm>
+#include <ctime>
+#include <filesystem>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
 #include <ostream>
 #include <random>
@@ -15,45 +18,82 @@
 #include <unistd.h>
 #endif
 
-#if __APPLE__
+#if defined(__APPLE__)
 #include <mach-o/dyld.h>
 #endif
 
+#if defined(_WIN32)
+#include <windows.h>
+#include <codecvt>
+#include <locale>
+#endif
+
 namespace cortex_utils {
 inline std::string logs_folder = "./logs";
 inline std::string logs_base_name = "./logs/cortex.log";
 inline std::string logs_cli_base_name = "./logs/cortex-cli.log";
 
+// example: Mon, 25 Nov 2024 09:57:03 GMT
+inline std::string GetDateRFC1123() {
+  std::time_t now = std::time(nullptr);
+  std::tm* gmt_time = std::gmtime(&now);
+  std::ostringstream oss;
+  oss << std::put_time(gmt_time, "%a, %d %b %Y %H:%M:%S GMT");
+  return oss.str();
+}
+
 inline drogon::HttpResponsePtr CreateCortexHttpResponse() {
-  return drogon::HttpResponse::newHttpResponse();
+  auto res = drogon::HttpResponse::newHttpResponse();
+#if defined(_WIN32)
+  res->addHeader("date", GetDateRFC1123());
+#endif
+  return res;
 }
 
+inline drogon::HttpResponsePtr CreateCortexHttpTextAsJsonResponse(
+    const std::string& data) {
+  auto res = drogon::HttpResponse::newHttpResponse();
+  res->setBody(data);
+  res->setContentTypeCode(drogon::CT_APPLICATION_JSON);
+#if defined(_WIN32)
+  res->addHeader("date", GetDateRFC1123());
+#endif
+  return res;
+};
+
 inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(
     const Json::Value& data) {
-  return drogon::HttpResponse::newHttpJsonResponse(data);
+  auto res = drogon::HttpResponse::newHttpJsonResponse(data);
+#if defined(_WIN32)
+  res->addHeader("date", GetDateRFC1123());
+#endif
+  return res;
 };
 
 inline drogon::HttpResponsePtr CreateCortexStreamResponse(
     const std::function<std::size_t(char*, std::size_t)>& callback,
     const std::string& attachmentFileName = "") {
-  return drogon::HttpResponse::newStreamResponse(
+  auto res = drogon::HttpResponse::newStreamResponse(
       callback, attachmentFileName, drogon::CT_NONE, "text/event-stream");
+#if defined(_WIN32)
+  res->addHeader("date", GetDateRFC1123());
+#endif
+  return res;
 }
 
+
+
 #if defined(_WIN32)
 inline std::string GetCurrentPath() {
-  wchar_t path[MAX_PATH];
-  DWORD result = GetModuleFileNameW(NULL, path, MAX_PATH);
+  char path[MAX_PATH];
+  DWORD result = GetModuleFileNameA(NULL, path, MAX_PATH);
   if (result == 0) {
-    std::wcerr << L"Error getting module file name." << std::endl;
+    std::cerr << "Error getting module file name." << std::endl;
     return "";
   }
-  std::wstring::size_type pos = std::wstring(path).find_last_of(L"\\/");
-  auto ws = std::wstring(path).substr(0, pos);
-  std::string res;
-  std::transform(ws.begin(), ws.end(), std::back_inserter(res),
-                 [](wchar_t c) { return (char)c; });
-  return res;
+
+  std::string::size_type pos = std::string(path).find_last_of("\\/");
+  return std::string(path).substr(0, pos);
 }
 #else
 inline std::string GetCurrentPath() {
diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h
index c63a58ab9..5dab49936 100644
--- a/engine/utils/engine_constants.h
+++ b/engine/utils/engine_constants.h
@@ -9,7 +9,7 @@ constexpr const auto kLlamaRepo = "cortex.llamacpp";
 constexpr const auto kTrtLlmRepo = "cortex.tensorrt-llm";
 constexpr const auto kPythonRuntimeRepo = "cortex.python";
 
-constexpr const auto kLlamaLibPath = "/engines/cortex.llamacpp";
+constexpr const auto kLlamaLibPath = "./engines/cortex.llamacpp";
 constexpr const auto kPythonRuntimeLibPath = "/engines/cortex.python";
 constexpr const auto kOnnxLibPath = "/engines/cortex.onnx";
 constexpr const auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm";
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
index 399afcfa6..72310385c 100644
--- a/engine/utils/file_manager_utils.h
+++ b/engine/utils/file_manager_utils.h
@@ -7,6 +7,7 @@
 #include "utils/config_yaml_utils.h"
 #include "utils/engine_constants.h"
 #include "utils/result.hpp"
+#include "utils/widechar_conv.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <mach-o/dyld.h>
@@ -14,6 +15,8 @@
 #include <unistd.h>
 #elif defined(_WIN32)
 #include <windows.h>
+#include <codecvt>
+#include <locale>
 #endif
 
 namespace file_manager_utils {
@@ -55,8 +58,8 @@ inline std::filesystem::path GetExecutableFolderContainerPath() {
     return std::filesystem::current_path();
   }
 #elif defined(_WIN32)
-  char buffer[MAX_PATH];
-  GetModuleFileNameA(NULL, buffer, MAX_PATH);
+  wchar_t buffer[MAX_PATH];
+  GetModuleFileNameW(NULL, buffer, MAX_PATH);
   // CTL_DBG("Executable path: " << buffer);
   return std::filesystem::path{buffer}.parent_path();
 #else
@@ -67,11 +70,11 @@ inline std::filesystem::path GetExecutableFolderContainerPath() {
 
 inline std::filesystem::path GetHomeDirectoryPath() {
 #ifdef _WIN32
-  const char* homeDir = std::getenv("USERPROFILE");
+  const wchar_t* homeDir = _wgetenv(L"USERPROFILE");
   if (!homeDir) {
     // Fallback if USERPROFILE is not set
-    const char* homeDrive = std::getenv("HOMEDRIVE");
-    const char* homePath = std::getenv("HOMEPATH");
+    const wchar_t* homeDrive = _wgetenv(L"HOMEDRIVE");
+    const wchar_t* homePath = _wgetenv(L"HOMEPATH");
     if (homeDrive && homePath) {
       return std::filesystem::path(homeDrive) / std::filesystem::path(homePath);
     } else {
@@ -103,8 +106,12 @@ inline std::filesystem::path GetConfigurationPath() {
   }
 
   if (config_file_path != kDefaultConfigurationPath) {
-    // CTL_INF("Config file path: " + config_file_path);
+// CTL_INF("Config file path: " + config_file_path);
+#if defined(_WIN32)
+    return std::filesystem::u8path(config_file_path);
+#else
     return std::filesystem::path(config_file_path);
+#endif
   }
 
   std::string variant{CORTEX_VARIANT};
@@ -162,11 +169,21 @@ inline config_yaml_utils::CortexConfig GetDefaultConfig() {
           : std::filesystem::path(cortex_data_folder_path);
 
   return config_yaml_utils::CortexConfig{
+#if defined(_WIN32)
+      .logFolderPath =
+          cortex::wc::WstringToUtf8(default_data_folder_path.wstring()),
+#else
       .logFolderPath = default_data_folder_path.string(),
+#endif
       .logLlamaCppPath = kLogsLlamacppBaseName,
       .logTensorrtLLMPath = kLogsTensorrtllmBaseName,
       .logOnnxPath = kLogsOnnxBaseName,
+#if defined(_WIN32)
+      .dataFolderPath =
+          cortex::wc::WstringToUtf8(default_data_folder_path.wstring()),
+#else
       .dataFolderPath = default_data_folder_path.string(),
+#endif
       .maxLogLines = config_yaml_utils::kDefaultMaxLines,
       .apiServerHost = config_yaml_utils::kDefaultHost,
       .apiServerPort = config_yaml_utils::kDefaultPort,
@@ -220,7 +237,11 @@ inline std::filesystem::path GetCortexDataPath() {
   auto config = GetCortexConfig();
   std::filesystem::path data_folder_path;
   if (!config.dataFolderPath.empty()) {
+#if defined(_WIN32)
+    data_folder_path = std::filesystem::u8path(config.dataFolderPath);
+#else
     data_folder_path = std::filesystem::path(config.dataFolderPath);
+#endif
   } else {
     auto home_path = GetHomeDirectoryPath();
     data_folder_path = home_path / kCortexFolderName;
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 6183c3095..013069699 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -87,8 +87,8 @@ inline std::unique_ptr<SystemInfo> GetSystemInfo() {
 inline bool IsNvidiaSmiAvailable() {
 #ifdef _WIN32
   // Check if nvidia-smi.exe exists in the PATH on Windows
-  char buffer[MAX_PATH];
-  if (SearchPath(NULL, "nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) {
+  wchar_t buffer[MAX_PATH];
+  if (SearchPath(NULL, L"nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) {
     return true;
   } else {
     return false;
diff --git a/engine/utils/widechar_conv.h b/engine/utils/widechar_conv.h
new file mode 100644
index 000000000..e979be3c1
--- /dev/null
+++ b/engine/utils/widechar_conv.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#if defined(_WIN32)
+#include <windows.h>
+#include <string>
+
+namespace cortex::wc {
+
+inline std::string WstringToUtf8(const std::wstring& wstr) {
+    if (wstr.empty()) {
+        return std::string();
+    }
+
+    int size_needed = WideCharToMultiByte(CP_UTF8, 0, wstr.data(), (int)wstr.size(), NULL, 0, NULL, NULL);
+    if (size_needed <= 0) {
+        throw std::runtime_error("WideCharToMultiByte() failed: " + std::to_string(GetLastError()));
+    }
+
+    std::string result(size_needed, 0);
+    int bytes_written = WideCharToMultiByte(CP_UTF8, 0, wstr.data(), (int)wstr.size(), &result[0], size_needed, NULL, NULL);
+    if (bytes_written <= 0) {
+        throw std::runtime_error("WideCharToMultiByte() failed: " + std::to_string(GetLastError()));
+    }
+
+    return result;
+}
+
+inline std::wstring Utf8ToWstring(const std::string& str) {
+    if (str.empty()) {
+        return std::wstring();
+    }
+
+    int size_needed = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), NULL, 0);
+    if (size_needed <= 0) {
+        throw std::runtime_error("MultiByteToWideChar() failed: " + std::to_string(GetLastError()));
+    }
+
+    std::wstring result(size_needed, 0);
+    int chars_written = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), &result[0], size_needed);
+    if (chars_written <= 0) {
+        throw std::runtime_error("MultiByteToWideChar() failed: " + std::to_string(GetLastError()));
+    }
+
+    return result;
+}
+
+};
+
+#endif
\ No newline at end of file