From 6518c54e2f0721c6724160ef4e90dcc14496cd3d Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Sat, 15 Feb 2025 23:35:29 +0700
Subject: [PATCH 01/23] test: try 1

---
 .github/workflows/cortex-cpp-quality-gate.yml | 118 +++++-----
 engine/e2e-test/main-harry.py                 |   6 +
 engine/e2e-test/requirements.txt              |   3 +-
 engine/e2e-test/test_api_engine.py            | 212 +++++++++++++-----
 4 files changed, 220 insertions(+), 119 deletions(-)
 create mode 100644 engine/e2e-test/main-harry.py

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 8a76e4669..ec8e1eae2 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -1,12 +1,12 @@
 name: CI Quality Gate Cortex CPP
 
 on:
-  pull_request:
-    types: [opened, synchronize, reopened, ready_for_review]
-    paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
+  # pull_request:
+  #   types: [opened, synchronize, reopened, ready_for_review]
+  #   paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
   workflow_dispatch:
-  schedule:
-    - cron: '0 22 * * *'
+  # schedule:
+  #   - cron: '0 22 * * *'
 
 env:
   LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
@@ -131,7 +131,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main.py
+          python e2e-test/main-harry
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -145,7 +145,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main.py
+          python e2e-test/main-harry
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:
@@ -159,7 +159,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          python e2e-test/main-harry.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -173,7 +173,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          python e2e-test/main-harry.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:
@@ -218,58 +218,58 @@ jobs:
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-  build-docker-and-test:
-    runs-on: ubuntu-24-04-docker
-    steps:
-      - name: Getting the repo
-        uses: actions/checkout@v3
-        with:
-          submodules: 'recursive'
+  # build-docker-and-test:
+  #   runs-on: ubuntu-24-04-docker
+  #   steps:
+  #     - name: Getting the repo
+  #       uses: actions/checkout@v3
+  #       with:
+  #         submodules: 'recursive'
       
-      - name: Run Docker
-        if: github.event_name != 'schedule'
-        run: |
-          docker build \
-            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
-            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
-            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
-            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
-            -t menloltd/cortex:test -f docker/Dockerfile.cache .
-          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
-          sleep 20
+  #     - name: Run Docker
+  #       if: github.event_name != 'schedule'
+  #       run: |
+  #         docker build \
+  #           --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+  #           --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+  #           --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+  #           --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+  #           -t menloltd/cortex:test -f docker/Dockerfile.cache .
+  #         docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+  #         sleep 20
 
-      - name: Run Docker
-        if: github.event_name == 'schedule'
-        run: |
-          latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
-          echo "cortex.llamacpp latest release: $latest_prerelease"
-          docker build \
-            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
-            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
-            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
-            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
-            --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
-            -t menloltd/cortex:test -f docker/Dockerfile.cache .
-          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
-          sleep 20
+  #     - name: Run Docker
+  #       if: github.event_name == 'schedule'
+  #       run: |
+  #         latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
+  #         echo "cortex.llamacpp latest release: $latest_prerelease"
+  #         docker build \
+  #           --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+  #           --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+  #           --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+  #           --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+  #           --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
+  #           -t menloltd/cortex:test -f docker/Dockerfile.cache .
+  #         docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+  #         sleep 20
 
-      - name: use python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
+  #     - name: use python
+  #       uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.10"
 
-      - name: Run e2e tests
-        run: |
-          cd engine
-          python -m pip install --upgrade pip
-          python -m pip install -r e2e-test/requirements.txt
-          pytest e2e-test/test_api_docker.py
+  #     - name: Run e2e tests
+  #       run: |
+  #         cd engine
+  #         python -m pip install --upgrade pip
+  #         python -m pip install -r e2e-test/requirements.txt
+  #         pytest e2e-test/test_api_docker.py
 
-      - name: Run Docker
-        continue-on-error: true
-        if: always()  
-        run: |
-          docker logs cortex
-          docker stop cortex
-          docker rm cortex
-          echo "y\n" | docker system prune -af
+  #     - name: Run Docker
+  #       continue-on-error: true
+  #       if: always()  
+  #       run: |
+  #         docker logs cortex
+  #         docker stop cortex
+  #         docker rm cortex
+  #         echo "y\n" | docker system prune -af
diff --git a/engine/e2e-test/main-harry.py b/engine/e2e-test/main-harry.py
new file mode 100644
index 000000000..a7dcf0d36
--- /dev/null
+++ b/engine/e2e-test/main-harry.py
@@ -0,0 +1,6 @@
+import pytest
+import sys
+from test_api_engine import TestApiEngine
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt
index 05b47e0b0..d9c436a11 100644
--- a/engine/e2e-test/requirements.txt
+++ b/engine/e2e-test/requirements.txt
@@ -2,4 +2,5 @@ websockets
 pytest
 pytest-asyncio
 requests
-pyyaml
\ No newline at end of file
+pyyaml
+jsonschema
\ No newline at end of file
diff --git a/engine/e2e-test/test_api_engine.py b/engine/e2e-test/test_api_engine.py
index 57b47b879..d2d50afe6 100644
--- a/engine/e2e-test/test_api_engine.py
+++ b/engine/e2e-test/test_api_engine.py
@@ -6,6 +6,11 @@
     stop_server,
     wait_for_websocket_download_success_event,
 )
+import json
+import jsonschema
+
+# logging.basicConfig(level=logging.INFO, force=True)  # Ensure logs show
+# logger = logging.getLogger(__name__)
 
 class TestApiEngine:
 
@@ -21,80 +26,169 @@ def setup_and_teardown(self):
         # Teardown
         stop_server()
     
+    
+        
+    # # engines install
+    # def test_engines_install_llamacpp_specific_version_and_variant(self):
+    #     data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"}
+    #     response = requests.post(
+    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     assert response.status_code == 200
+    #     with open("response-install.json", "w") as file:
+    #         json.dump(response.json(), file, indent=4)
+
     # engines get
     def test_engines_get_llamacpp_should_be_successful(self):
-        response = requests.get("http://localhost:3928/engines/llama-cpp")
-        assert response.status_code == 200
-        
-    # engines install
-    def test_engines_install_llamacpp_specific_version_and_variant(self):
+        engine= "llama-cpp"
+        name= "linux-amd64-avx-cuda-11-7"
+        version= "v0.1.35-27.10.24"
+    
+        # data = {"version": version, "variant": name}
         data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"}
+        post_url = f"http://localhost:3928/v1/engines/{engine}/install"
         response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+            post_url, json=data
         )
         assert response.status_code == 200
+        
+        get_url = f"http://localhost:3928/v1/engines/{engine}"
+        response = requests.get(get_url)
+        count = 0
+        while len(response.json()) == 0:
+            time.sleep(1)
+            response = requests.get(get_url)
+            count += 1
 
-    def test_engines_install_llamacpp_specific_version_and_null_variant(self):
-        data = {"version": "v0.1.35-27.10.24"}
-        response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-        )
-        assert response.status_code == 200
-    
-    # engines uninstall
-    @pytest.mark.asyncio
-    async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
-        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        json_data = response.json()
+        with open("e2e-test/response.json", "w") as file:
+            json.dump(json_data, file, indent=4)
+            # file.write(f"\nCount: {count}\n") 
         assert response.status_code == 200
-        await wait_for_websocket_download_success_event(timeout=None)
-        time.sleep(30)
 
-        response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install")
-        assert response.status_code == 200
+        schema = {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "engine": {"type": "string"},
+                    "name": {"type": "string"},
+                    "version": {"type": "string"}
+                },
+                "required": ["engine", "name", "version"]
+            }
+        }
 
-    @pytest.mark.asyncio
-    async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_failed(self):
-        # install first
-        data = {"variant": "mac-arm64"}
-        install_response = requests.post(
-            "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
-        )
-        await wait_for_websocket_download_success_event(timeout=120)
-        assert install_response.status_code == 200
+        # Validate response schema
+        jsonschema.validate(instance=json_data, schema=schema)
 
-        data = {"version": "v0.1.35"}
-        response = requests.delete(
-            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-        )
-        assert response.status_code == 400
-        assert response.json()["message"] == "No variant provided"
-
-    @pytest.mark.asyncio
-    async def test_engines_install_uninstall_llamacpp_with_variant_should_be_successful(self):
-        # install first
-        data = {"variant": "mac-arm64"}
-        install_response = requests.post(
-            "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
+        assert json_data[0]["engine"] == engine
+        assert json_data[0]["version"] == version
+        assert json_data[0]["name"] == name
+
+        delete_url = f"http://localhost:3928/v1/engines/{engine}/install"
+        delete_response = requests.delete(
+            delete_url, json=data
         )
-        await wait_for_websocket_download_success_event(timeout=120)
-        assert install_response.status_code == 200
+        with open("e2e-test/response_engine_uninstall.json", "w") as file:
+            json.dump(delete_response.json(), file, indent=4)
+        assert delete_response.status_code ==200
+        assert delete_response.json()["message"] == "Engine llama-cpp uninstalled successfully!"
 
-        response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
-        assert response.status_code == 200
+        get_url = f"http://localhost:3928/v1/engines/{engine}"
+        get_response = requests.get(get_url)
+        assert len(get_response.json()) == 0
 
-    def test_engines_install_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
-        self,
-    ):
-        data = {"variant": "mac-arm64", "version": "v0.1.35"}
-        # install first
-        install_response = requests.post(
-            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-        )
-        assert install_response.status_code == 200
 
-        response = requests.delete(
-            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-        )
+    def test_engines_get_llamacpp_release_list(self):
+        engine= "llama-cpp"
+
+        get_url = f"http://localhost:3928/v1/engines/{engine}/releases"
+        response = requests.get(get_url)
         assert response.status_code == 200
 
+        json_data = response.json()
+        with open("e2e-test/response_engine_release.json", "w") as file:
+            json.dump(json_data, file, indent=4)
+
+        schema = {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "draft": {"type": "boolean"},
+                    "name": {"type": "string"},
+                    "prerelease": {"type": "boolean"},
+                    "published_at": {"type": "string", "format": "date-time"},
+                    "url": {"type": "string", "format": "uri"}
+                },
+                "required": ["draft", "name", "prerelease", "published_at", "url"]
+            }
+        }
+
+        jsonschema.validate(instance=json_data, schema=schema)
+
+    # def test_engines_install_llamacpp_specific_version_and_null_variant(self):
+    #     data = {"version": "v0.1.35-27.10.24"}
+    #     response = requests.post(
+    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     assert response.status_code == 200
+    
+    # # engines uninstall
+    # @pytest.mark.asyncio
+    # async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
+    #     response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+    #     assert response.status_code == 200
+    #     await wait_for_websocket_download_success_event(timeout=None)
+    #     time.sleep(30)
+
+    #     response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install")
+    #     assert response.status_code == 200
+
+    # @pytest.mark.asyncio
+    # async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_failed(self):
+    #     # install first
+    #     data = {"variant": "mac-arm64"}
+    #     install_response = requests.post(
+    #         "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     await wait_for_websocket_download_success_event(timeout=120)
+    #     assert install_response.status_code == 200
+
+    #     data = {"version": "v0.1.35"}
+    #     response = requests.delete(
+    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     assert response.status_code == 400
+    #     assert response.json()["message"] == "No variant provided"
+
+    # @pytest.mark.asyncio
+    # async def test_engines_install_uninstall_llamacpp_with_variant_should_be_successful(self):
+    #     # install first
+    #     data = {"variant": "mac-arm64"}
+    #     install_response = requests.post(
+    #         "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     await wait_for_websocket_download_success_event(timeout=120)
+    #     assert install_response.status_code == 200
+
+    #     response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
+    #     assert response.status_code == 200
+
+    # def test_engines_install_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
+    #     self,
+    # ):
+    #     data = {"variant": "mac-arm64", "version": "v0.1.35"}
+    #     # install first
+    #     install_response = requests.post(
+    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     assert install_response.status_code == 200
+
+    #     response = requests.delete(
+    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+    #     )
+    #     assert response.status_code == 200
+
     
\ No newline at end of file

From 66a5cc7fe12d787edf7f984e3ac564e48bec82cc Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Sat, 15 Feb 2025 23:54:15 +0700
Subject: [PATCH 02/23] test: try 2

---
 .github/workflows/cortex-cpp-quality-gate.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index ec8e1eae2..235eb4119 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -131,7 +131,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main-harry
+          python e2e-test/main-harry.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -145,7 +145,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main-harry
+          python e2e-test/main-harry.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:

From 24c3f2d42a92d3bd43f99e6bfb20d08ff282765e Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Thu, 20 Feb 2025 13:07:40 +0700
Subject: [PATCH 03/23] test: reorganize folder

---
 .../e2e-test/api/engines/test_api_engine.py   | 100 +++++++++
 .../test_api_engine_install_nightly.py        |   2 +-
 .../{ => api/engines}/test_api_engine_list.py |   2 +-
 .../engines}/test_api_engine_update.py        |   2 +-
 .../test_api_cortexso_hub_llamacpp_engine.py  |   2 +-
 .../{ => api/model}/test_api_docker.py        |   2 +-
 .../{ => api/model}/test_api_model.py         |   2 +-
 .../{ => api/model}/test_api_model_import.py  |   2 +-
 .../{ => cli/common}/test_cli_server_start.py |   4 +-
 .../{ => cli/common}/test_cortex_update.py    |   2 +-
 .../common}/test_create_log_folder.py         |   4 +-
 .../{ => cli/engines}/test_cli_engine_get.py  |   2 +-
 .../engines}/test_cli_engine_install.py       |   2 +-
 .../test_cli_engine_install_nightly.py        |   2 +-
 .../{ => cli/engines}/test_cli_engine_list.py |   2 +-
 .../engines}/test_cli_engine_uninstall.py     |   2 +-
 .../{ => cli/model}/test_cli_model.py         |   2 +-
 .../{ => cli/model}/test_cli_model_import.py  |   4 +-
 ..._cli_model_pull_cortexso_with_selection.py |   2 +-
 .../test_cli_model_pull_from_cortexso.py      |   2 +-
 ..._cli_model_pull_hugging_face_repository.py |   2 +-
 .../e2e-test/cortex-llamacpp-e2e-nightly.py   |  22 --
 engine/e2e-test/main-harry.py                 |   6 -
 engine/e2e-test/main.py                       |  22 --
 .../runner/cortex-llamacpp-e2e-nightly.py     |  36 ++++
 engine/e2e-test/runner/main.py                |  36 ++++
 engine/e2e-test/test_api_engine.py            | 194 ------------------
 engine/e2e-test/utils/logger.py               |  15 ++
 engine/e2e-test/{ => utils}/test_runner.py    |   0
 29 files changed, 210 insertions(+), 267 deletions(-)
 create mode 100644 engine/e2e-test/api/engines/test_api_engine.py
 rename engine/e2e-test/{ => api/engines}/test_api_engine_install_nightly.py (93%)
 rename engine/e2e-test/{ => api/engines}/test_api_engine_list.py (91%)
 rename engine/e2e-test/{ => api/engines}/test_api_engine_update.py (97%)
 rename engine/e2e-test/{ => api/hub}/test_api_cortexso_hub_llamacpp_engine.py (99%)
 rename engine/e2e-test/{ => api/model}/test_api_docker.py (97%)
 rename engine/e2e-test/{ => api/model}/test_api_model.py (99%)
 rename engine/e2e-test/{ => api/model}/test_api_model_import.py (97%)
 rename engine/e2e-test/{ => cli/common}/test_cli_server_start.py (84%)
 rename engine/e2e-test/{ => cli/common}/test_cortex_update.py (94%)
 rename engine/e2e-test/{ => cli/common}/test_create_log_folder.py (90%)
 rename engine/e2e-test/{ => cli/engines}/test_cli_engine_get.py (97%)
 rename engine/e2e-test/{ => cli/engines}/test_cli_engine_install.py (98%)
 rename engine/e2e-test/{ => cli/engines}/test_cli_engine_install_nightly.py (97%)
 rename engine/e2e-test/{ => cli/engines}/test_cli_engine_list.py (95%)
 rename engine/e2e-test/{ => cli/engines}/test_cli_engine_uninstall.py (96%)
 rename engine/e2e-test/{ => cli/model}/test_cli_model.py (98%)
 rename engine/e2e-test/{ => cli/model}/test_cli_model_import.py (89%)
 rename engine/e2e-test/{ => cli/model}/test_cli_model_pull_cortexso_with_selection.py (94%)
 rename engine/e2e-test/{ => cli/model}/test_cli_model_pull_from_cortexso.py (95%)
 rename engine/e2e-test/{ => cli/model}/test_cli_model_pull_hugging_face_repository.py (96%)
 delete mode 100644 engine/e2e-test/cortex-llamacpp-e2e-nightly.py
 delete mode 100644 engine/e2e-test/main-harry.py
 delete mode 100644 engine/e2e-test/main.py
 create mode 100644 engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
 create mode 100644 engine/e2e-test/runner/main.py
 delete mode 100644 engine/e2e-test/test_api_engine.py
 create mode 100644 engine/e2e-test/utils/logger.py
 rename engine/e2e-test/{ => utils}/test_runner.py (100%)

diff --git a/engine/e2e-test/api/engines/test_api_engine.py b/engine/e2e-test/api/engines/test_api_engine.py
new file mode 100644
index 000000000..aa491caf7
--- /dev/null
+++ b/engine/e2e-test/api/engines/test_api_engine.py
@@ -0,0 +1,100 @@
+import pytest
+import requests
+import time
+from utils.test_runner import (
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
+
+class TestApiEngine:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+    
+    # engines get
+    def test_engines_get_llamacpp_should_be_successful(self):
+        response = requests.get("http://localhost:3928/engines/llama-cpp")
+        assert response.status_code == 200
+        
+    # engines install
+    def test_engines_install_llamacpp_specific_version_and_variant(self):
+        data = {"version": "v0.1.40-b4354", "variant": "linux-amd64-avx-cuda-11-7"}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
+
+    def test_engines_install_llamacpp_specific_version_and_null_variant(self):
+        data = {"version": "v0.1.40-b4354"}
+        response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
+    
+    # engines uninstall
+    @pytest.mark.asyncio
+    async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+        await wait_for_websocket_download_success_event(timeout=None)
+        time.sleep(30)
+
+        response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+
+    @pytest.mark.asyncio
+    async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_failed(self):
+        # install first
+        data = {"variant": "mac-arm64"}
+        install_response = requests.post(
+            "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
+        )
+        await wait_for_websocket_download_success_event(timeout=120)
+        assert install_response.status_code == 200
+
+        data = {"version": "v0.1.35"}
+        response = requests.delete(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 400
+        assert response.json()["message"] == "No variant provided"
+
+    @pytest.mark.asyncio
+    async def test_engines_install_uninstall_llamacpp_with_variant_should_be_successful(self):
+        # install first
+        data = {"variant": "mac-arm64"}
+        install_response = requests.post(
+            "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
+        )
+        await wait_for_websocket_download_success_event(timeout=120)
+        assert install_response.status_code == 200
+
+        response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
+        assert response.status_code == 200
+
+    def test_engines_install_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
+        self,
+    ):
+        data = {"variant": "mac-arm64", "version": "v0.1.35"}
+        # install first
+        install_response = requests.post(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert install_response.status_code == 200
+
+        response = requests.delete(
+            "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+        )
+        assert response.status_code == 200
+
+    
\ No newline at end of file
diff --git a/engine/e2e-test/test_api_engine_install_nightly.py b/engine/e2e-test/api/engines/test_api_engine_install_nightly.py
similarity index 93%
rename from engine/e2e-test/test_api_engine_install_nightly.py
rename to engine/e2e-test/api/engines/test_api_engine_install_nightly.py
index de4914c28..34fda2d18 100644
--- a/engine/e2e-test/test_api_engine_install_nightly.py
+++ b/engine/e2e-test/api/engines/test_api_engine_install_nightly.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import start_server, stop_server, get_latest_pre_release_tag
+from utils.test_runner import start_server, stop_server, get_latest_pre_release_tag
 
 latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp")
 
diff --git a/engine/e2e-test/test_api_engine_list.py b/engine/e2e-test/api/engines/test_api_engine_list.py
similarity index 91%
rename from engine/e2e-test/test_api_engine_list.py
rename to engine/e2e-test/api/engines/test_api_engine_list.py
index f149f1450..10346c988 100644
--- a/engine/e2e-test/test_api_engine_list.py
+++ b/engine/e2e-test/api/engines/test_api_engine_list.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import start_server, stop_server
+from utils.test_runner import start_server, stop_server
 
 
 class TestApiEngineList:
diff --git a/engine/e2e-test/test_api_engine_update.py b/engine/e2e-test/api/engines/test_api_engine_update.py
similarity index 97%
rename from engine/e2e-test/test_api_engine_update.py
rename to engine/e2e-test/api/engines/test_api_engine_update.py
index 23939f038..bcc371406 100644
--- a/engine/e2e-test/test_api_engine_update.py
+++ b/engine/e2e-test/api/engines/test_api_engine_update.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import (
+from utils.test_runner import (
     start_server,
     stop_server,
     wait_for_websocket_download_success_event,
diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/api/hub/test_api_cortexso_hub_llamacpp_engine.py
similarity index 99%
rename from engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
rename to engine/e2e-test/api/hub/test_api_cortexso_hub_llamacpp_engine.py
index 9aecd3654..7a3c2e232 100644
--- a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
+++ b/engine/e2e-test/api/hub/test_api_cortexso_hub_llamacpp_engine.py
@@ -4,7 +4,7 @@
 import yaml
 
 from pathlib import Path
-from test_runner import (
+from utils.test_runner import (
     run,
     start_server,
     stop_server,
diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/api/model/test_api_docker.py
similarity index 97%
rename from engine/e2e-test/test_api_docker.py
rename to engine/e2e-test/api/model/test_api_docker.py
index b46b1f782..7276f2078 100644
--- a/engine/e2e-test/test_api_docker.py
+++ b/engine/e2e-test/api/model/test_api_docker.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import wait_for_websocket_download_success_event
+from utils.test_runner import wait_for_websocket_download_success_event
 
 repo_branches = ["tinyllama:1b-gguf"]
 
diff --git a/engine/e2e-test/test_api_model.py b/engine/e2e-test/api/model/test_api_model.py
similarity index 99%
rename from engine/e2e-test/test_api_model.py
rename to engine/e2e-test/api/model/test_api_model.py
index 8f2e4b07a..dd3a0fc65 100644
--- a/engine/e2e-test/test_api_model.py
+++ b/engine/e2e-test/api/model/test_api_model.py
@@ -1,7 +1,7 @@
 import pytest
 import requests
 import time
-from test_runner import (
+from utils.test_runner import (
     run,
     start_server,
     stop_server,
diff --git a/engine/e2e-test/test_api_model_import.py b/engine/e2e-test/api/model/test_api_model_import.py
similarity index 97%
rename from engine/e2e-test/test_api_model_import.py
rename to engine/e2e-test/api/model/test_api_model_import.py
index 7efbd52da..46a22e59e 100644
--- a/engine/e2e-test/test_api_model_import.py
+++ b/engine/e2e-test/api/model/test_api_model_import.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import start_server, stop_server
+from utils.test_runner import start_server, stop_server
 
 class TestApiModelImport:
     @pytest.fixture(autouse=True)
diff --git a/engine/e2e-test/test_cli_server_start.py b/engine/e2e-test/cli/common/test_cli_server_start.py
similarity index 84%
rename from engine/e2e-test/test_cli_server_start.py
rename to engine/e2e-test/cli/common/test_cli_server_start.py
index abe236cd0..549191458 100644
--- a/engine/e2e-test/test_cli_server_start.py
+++ b/engine/e2e-test/cli/common/test_cli_server_start.py
@@ -1,8 +1,8 @@
 import platform
 import os
 import pytest, requests
-from test_runner import run
-from test_runner import start_server, stop_server
+from utils.test_runner import run
+from utils.test_runner import start_server, stop_server
 
 
 class TestCliServerStart:
diff --git a/engine/e2e-test/test_cortex_update.py b/engine/e2e-test/cli/common/test_cortex_update.py
similarity index 94%
rename from engine/e2e-test/test_cortex_update.py
rename to engine/e2e-test/cli/common/test_cortex_update.py
index 8f6f8d7f8..7bcbdba5c 100644
--- a/engine/e2e-test/test_cortex_update.py
+++ b/engine/e2e-test/cli/common/test_cortex_update.py
@@ -2,7 +2,7 @@
 import tempfile
 
 import pytest
-from test_runner import run
+from utils.test_runner import run
 
 
 class TestCortexUpdate:
diff --git a/engine/e2e-test/test_create_log_folder.py b/engine/e2e-test/cli/common/test_create_log_folder.py
similarity index 90%
rename from engine/e2e-test/test_create_log_folder.py
rename to engine/e2e-test/cli/common/test_create_log_folder.py
index 5dbbd521c..7ad6ee452 100644
--- a/engine/e2e-test/test_create_log_folder.py
+++ b/engine/e2e-test/cli/common/test_create_log_folder.py
@@ -2,8 +2,8 @@
 import os
 from pathlib import Path
 import pytest, requests, shutil
-from test_runner import run
-from test_runner import start_server, stop_server
+from utils.test_runner import run
+from utils.test_runner import start_server, stop_server
 
 
 class TestCreateLogFolder:
diff --git a/engine/e2e-test/test_cli_engine_get.py b/engine/e2e-test/cli/engines/test_cli_engine_get.py
similarity index 97%
rename from engine/e2e-test/test_cli_engine_get.py
rename to engine/e2e-test/cli/engines/test_cli_engine_get.py
index c26bedfae..b17ea7cc8 100644
--- a/engine/e2e-test/test_cli_engine_get.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_get.py
@@ -1,7 +1,7 @@
 import platform
 
 import pytest
-from test_runner import run, start_server, stop_server
+from utils.test_runner import run, start_server, stop_server
 
 
 class TestCliEngineGet:
diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/cli/engines/test_cli_engine_install.py
similarity index 98%
rename from engine/e2e-test/test_cli_engine_install.py
rename to engine/e2e-test/cli/engines/test_cli_engine_install.py
index a998f3183..3ffe659ea 100644
--- a/engine/e2e-test/test_cli_engine_install.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_install.py
@@ -3,7 +3,7 @@
 
 import pytest
 import requests
-from test_runner import run, start_server, stop_server
+from utils.test_runner import run, start_server, stop_server
 
 
 class TestCliEngineInstall:
diff --git a/engine/e2e-test/test_cli_engine_install_nightly.py b/engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py
similarity index 97%
rename from engine/e2e-test/test_cli_engine_install_nightly.py
rename to engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py
index 8c66c284c..e4e46ecfb 100644
--- a/engine/e2e-test/test_cli_engine_install_nightly.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py
@@ -3,7 +3,7 @@
 
 import pytest
 import requests
-from test_runner import run, start_server, stop_server, get_latest_pre_release_tag
+from utils.test_runner import run, start_server, stop_server, get_latest_pre_release_tag
 
 latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp")
 
diff --git a/engine/e2e-test/test_cli_engine_list.py b/engine/e2e-test/cli/engines/test_cli_engine_list.py
similarity index 95%
rename from engine/e2e-test/test_cli_engine_list.py
rename to engine/e2e-test/cli/engines/test_cli_engine_list.py
index 6a79bb449..b2d59e855 100644
--- a/engine/e2e-test/test_cli_engine_list.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_list.py
@@ -1,7 +1,7 @@
 import platform
 
 import pytest
-from test_runner import run, start_server, stop_server
+from utils.test_runner import run, start_server, stop_server
 
 
 class TestCliEngineList:
diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
similarity index 96%
rename from engine/e2e-test/test_cli_engine_uninstall.py
rename to engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
index fcc5f5c73..a33a2d5cc 100644
--- a/engine/e2e-test/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
@@ -1,6 +1,6 @@
 import pytest
 import requests
-from test_runner import (
+from utils.test_runner import (
     run,
     start_server,
     stop_server,
diff --git a/engine/e2e-test/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py
similarity index 98%
rename from engine/e2e-test/test_cli_model.py
rename to engine/e2e-test/cli/model/test_cli_model.py
index f6aad4ae9..63261c214 100644
--- a/engine/e2e-test/test_cli_model.py
+++ b/engine/e2e-test/cli/model/test_cli_model.py
@@ -2,7 +2,7 @@
 import requests
 import os
 from pathlib import Path
-from test_runner import (
+from utils.test_runner import (
     run,
     start_server,
     stop_server,
diff --git a/engine/e2e-test/test_cli_model_import.py b/engine/e2e-test/cli/model/test_cli_model_import.py
similarity index 89%
rename from engine/e2e-test/test_cli_model_import.py
rename to engine/e2e-test/cli/model/test_cli_model_import.py
index cf94d1a2a..e2c5010a4 100644
--- a/engine/e2e-test/test_cli_model_import.py
+++ b/engine/e2e-test/cli/model/test_cli_model_import.py
@@ -1,6 +1,6 @@
 import pytest
-from test_runner import run
-from test_runner import start_server, stop_server
+from utils.test_runner import run
+from utils.test_runner import start_server, stop_server
 
 class TestCliModelImport:
     
diff --git a/engine/e2e-test/test_cli_model_pull_cortexso_with_selection.py b/engine/e2e-test/cli/model/test_cli_model_pull_cortexso_with_selection.py
similarity index 94%
rename from engine/e2e-test/test_cli_model_pull_cortexso_with_selection.py
rename to engine/e2e-test/cli/model/test_cli_model_pull_cortexso_with_selection.py
index 8c3de8d98..d6a78843b 100644
--- a/engine/e2e-test/test_cli_model_pull_cortexso_with_selection.py
+++ b/engine/e2e-test/cli/model/test_cli_model_pull_cortexso_with_selection.py
@@ -1,4 +1,4 @@
-from test_runner import popen
+from utils.test_runner import popen
 import os
 from pathlib import Path
 
diff --git a/engine/e2e-test/test_cli_model_pull_from_cortexso.py b/engine/e2e-test/cli/model/test_cli_model_pull_from_cortexso.py
similarity index 95%
rename from engine/e2e-test/test_cli_model_pull_from_cortexso.py
rename to engine/e2e-test/cli/model/test_cli_model_pull_from_cortexso.py
index 1791e39a6..2a66ae19e 100644
--- a/engine/e2e-test/test_cli_model_pull_from_cortexso.py
+++ b/engine/e2e-test/cli/model/test_cli_model_pull_from_cortexso.py
@@ -1,5 +1,5 @@
 import pytest
-from test_runner import run
+from utils.test_runner import run
 
 
 class TestCliModelPullCortexso:
diff --git a/engine/e2e-test/test_cli_model_pull_hugging_face_repository.py b/engine/e2e-test/cli/model/test_cli_model_pull_hugging_face_repository.py
similarity index 96%
rename from engine/e2e-test/test_cli_model_pull_hugging_face_repository.py
rename to engine/e2e-test/cli/model/test_cli_model_pull_hugging_face_repository.py
index 996ac086c..49678ee7a 100644
--- a/engine/e2e-test/test_cli_model_pull_hugging_face_repository.py
+++ b/engine/e2e-test/cli/model/test_cli_model_pull_hugging_face_repository.py
@@ -1,5 +1,5 @@
 import pytest
-from test_runner import popen
+from utils.test_runner import popen
 
 
 class TestCliModelPullHuggingFaceRepository:
diff --git a/engine/e2e-test/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/cortex-llamacpp-e2e-nightly.py
deleted file mode 100644
index 0511277f3..000000000
--- a/engine/e2e-test/cortex-llamacpp-e2e-nightly.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pytest
-import sys
-
-### e2e tests are expensive, have to keep engines tests in order
-from test_api_engine_list import TestApiEngineList
-from test_api_engine_install_nightly import TestApiEngineInstall
-from test_api_model import TestApiModel
-from test_api_model_import import TestApiModelImport
-
-###
-from test_cli_engine_get import TestCliEngineGet
-from test_cli_engine_install_nightly import TestCliEngineInstall
-from test_cli_engine_list import TestCliEngineList
-from test_cli_engine_uninstall import TestCliEngineUninstall
-from test_cli_model import TestCliModel
-from test_cli_server_start import TestCliServerStart
-from test_cortex_update import TestCortexUpdate
-from test_create_log_folder import TestCreateLogFolder
-from test_cli_model_import import TestCliModelImport
-
-if __name__ == "__main__":
-    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/main-harry.py b/engine/e2e-test/main-harry.py
deleted file mode 100644
index a7dcf0d36..000000000
--- a/engine/e2e-test/main-harry.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import pytest
-import sys
-from test_api_engine import TestApiEngine
-
-if __name__ == "__main__":
-    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/main.py b/engine/e2e-test/main.py
deleted file mode 100644
index e874ab3a0..000000000
--- a/engine/e2e-test/main.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pytest
-import sys
-
-### e2e tests are expensive, have to keep engines tests in order
-from test_api_engine_list import TestApiEngineList
-from test_api_engine import TestApiEngine
-from test_api_model import TestApiModel
-from test_api_model_import import TestApiModelImport
-
-###
-from test_cli_engine_get import TestCliEngineGet
-from test_cli_engine_install import TestCliEngineInstall
-from test_cli_engine_list import TestCliEngineList
-from test_cli_engine_uninstall import TestCliEngineUninstall
-from test_cli_model import TestCliModel
-from test_cli_server_start import TestCliServerStart
-from test_cortex_update import TestCortexUpdate
-from test_create_log_folder import TestCreateLogFolder
-from test_cli_model_import import TestCliModelImport
-
-if __name__ == "__main__":
-    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
new file mode 100644
index 000000000..193d26d36
--- /dev/null
+++ b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
@@ -0,0 +1,36 @@
+import pytest
+import sys
+import os
+
+# Add the project root to sys.path
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+
+
+# Add all necessary paths
+sys.path.append(PROJECT_ROOT)               
+sys.path.append(os.path.join(PROJECT_ROOT, "api/engines"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "api/hub"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "api/model"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "cli/engines"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "cli/model"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "cli/common"))  
+
+### e2e tests are expensive, have to keep engines tests in order
+from api.engines.test_api_engine_list import TestApiEngineList
+from api.engines.test_api_engine_install_nightly import TestApiEngineInstall
+from api.model.test_api_model import TestApiModel
+from api.model.test_api_model_import import TestApiModelImport
+
+###
+from cli.engines.test_cli_engine_get import TestCliEngineGet
+from cli.engines.test_cli_engine_install_nightly import TestCliEngineInstall
+from cli.engines.test_cli_engine_list import TestCliEngineList
+from cli.engines.test_cli_engine_uninstall import TestCliEngineUninstall
+from cli.model.test_cli_model import TestCliModel
+from cli.model.test_cli_model_import import TestCliModelImport
+from cli.common.test_cli_server_start import TestCliServerStart
+from cli.common.test_cortex_update import TestCortexUpdate
+from cli.common.test_create_log_folder import TestCreateLogFolder
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/runner/main.py b/engine/e2e-test/runner/main.py
new file mode 100644
index 000000000..7a96f1164
--- /dev/null
+++ b/engine/e2e-test/runner/main.py
@@ -0,0 +1,36 @@
+import pytest
+import sys
+import os
+
+# Add the project root to sys.path
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+
+
+# Add all necessary paths
+sys.path.append(PROJECT_ROOT)               
+sys.path.append(os.path.join(PROJECT_ROOT, "api/engines"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "api/hub"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "api/model"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "cli/engines"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "cli/model"))  
+sys.path.append(os.path.join(PROJECT_ROOT, "cli/common"))  
+
+### e2e tests are expensive, have to keep engines tests in order
+from api.engines.test_api_engine_list import TestApiEngineList
+from api.engines.test_api_engine import TestApiEngine
+from api.model.test_api_model import TestApiModel
+from api.model.test_api_model_import import TestApiModelImport
+
+###
+from cli.engines.test_cli_engine_get import TestCliEngineGet
+from cli.engines.test_cli_engine_install import TestCliEngineInstall
+from cli.engines.test_cli_engine_list import TestCliEngineList
+from cli.engines.test_cli_engine_uninstall import TestCliEngineUninstall
+from cli.model.test_cli_model import TestCliModel
+from cli.model.test_cli_model_import import TestCliModelImport
+from cli.common.test_cli_server_start import TestCliServerStart
+from cli.common.test_cortex_update import TestCortexUpdate
+from cli.common.test_create_log_folder import TestCreateLogFolder
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/test_api_engine.py b/engine/e2e-test/test_api_engine.py
deleted file mode 100644
index d2d50afe6..000000000
--- a/engine/e2e-test/test_api_engine.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import pytest
-import requests
-import time
-from test_runner import (
-    start_server,
-    stop_server,
-    wait_for_websocket_download_success_event,
-)
-import json
-import jsonschema
-
-# logging.basicConfig(level=logging.INFO, force=True)  # Ensure logs show
-# logger = logging.getLogger(__name__)
-
-class TestApiEngine:
-
-    @pytest.fixture(autouse=True)
-    def setup_and_teardown(self):
-        # Setup
-        success = start_server()
-        if not success:
-            raise Exception("Failed to start server")
-
-        yield
-
-        # Teardown
-        stop_server()
-    
-    
-        
-    # # engines install
-    # def test_engines_install_llamacpp_specific_version_and_variant(self):
-    #     data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"}
-    #     response = requests.post(
-    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     assert response.status_code == 200
-    #     with open("response-install.json", "w") as file:
-    #         json.dump(response.json(), file, indent=4)
-
-    # engines get
-    def test_engines_get_llamacpp_should_be_successful(self):
-        engine= "llama-cpp"
-        name= "linux-amd64-avx-cuda-11-7"
-        version= "v0.1.35-27.10.24"
-    
-        # data = {"version": version, "variant": name}
-        data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"}
-        post_url = f"http://localhost:3928/v1/engines/{engine}/install"
-        response = requests.post(
-            post_url, json=data
-        )
-        assert response.status_code == 200
-        
-        get_url = f"http://localhost:3928/v1/engines/{engine}"
-        response = requests.get(get_url)
-        count = 0
-        while len(response.json()) == 0:
-            time.sleep(1)
-            response = requests.get(get_url)
-            count += 1
-
-        json_data = response.json()
-        with open("e2e-test/response.json", "w") as file:
-            json.dump(json_data, file, indent=4)
-            # file.write(f"\nCount: {count}\n") 
-        assert response.status_code == 200
-
-        schema = {
-            "type": "array",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "engine": {"type": "string"},
-                    "name": {"type": "string"},
-                    "version": {"type": "string"}
-                },
-                "required": ["engine", "name", "version"]
-            }
-        }
-
-        # Validate response schema
-        jsonschema.validate(instance=json_data, schema=schema)
-
-        assert json_data[0]["engine"] == engine
-        assert json_data[0]["version"] == version
-        assert json_data[0]["name"] == name
-
-        delete_url = f"http://localhost:3928/v1/engines/{engine}/install"
-        delete_response = requests.delete(
-            delete_url, json=data
-        )
-        with open("e2e-test/response_engine_uninstall.json", "w") as file:
-            json.dump(delete_response.json(), file, indent=4)
-        assert delete_response.status_code ==200
-        assert delete_response.json()["message"] == "Engine llama-cpp uninstalled successfully!"
-
-        get_url = f"http://localhost:3928/v1/engines/{engine}"
-        get_response = requests.get(get_url)
-        assert len(get_response.json()) == 0
-
-
-    def test_engines_get_llamacpp_release_list(self):
-        engine= "llama-cpp"
-
-        get_url = f"http://localhost:3928/v1/engines/{engine}/releases"
-        response = requests.get(get_url)
-        assert response.status_code == 200
-
-        json_data = response.json()
-        with open("e2e-test/response_engine_release.json", "w") as file:
-            json.dump(json_data, file, indent=4)
-
-        schema = {
-            "type": "array",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "draft": {"type": "boolean"},
-                    "name": {"type": "string"},
-                    "prerelease": {"type": "boolean"},
-                    "published_at": {"type": "string", "format": "date-time"},
-                    "url": {"type": "string", "format": "uri"}
-                },
-                "required": ["draft", "name", "prerelease", "published_at", "url"]
-            }
-        }
-
-        jsonschema.validate(instance=json_data, schema=schema)
-
-    # def test_engines_install_llamacpp_specific_version_and_null_variant(self):
-    #     data = {"version": "v0.1.35-27.10.24"}
-    #     response = requests.post(
-    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     assert response.status_code == 200
-    
-    # # engines uninstall
-    # @pytest.mark.asyncio
-    # async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
-    #     response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
-    #     assert response.status_code == 200
-    #     await wait_for_websocket_download_success_event(timeout=None)
-    #     time.sleep(30)
-
-    #     response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install")
-    #     assert response.status_code == 200
-
-    # @pytest.mark.asyncio
-    # async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_failed(self):
-    #     # install first
-    #     data = {"variant": "mac-arm64"}
-    #     install_response = requests.post(
-    #         "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     await wait_for_websocket_download_success_event(timeout=120)
-    #     assert install_response.status_code == 200
-
-    #     data = {"version": "v0.1.35"}
-    #     response = requests.delete(
-    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     assert response.status_code == 400
-    #     assert response.json()["message"] == "No variant provided"
-
-    # @pytest.mark.asyncio
-    # async def test_engines_install_uninstall_llamacpp_with_variant_should_be_successful(self):
-    #     # install first
-    #     data = {"variant": "mac-arm64"}
-    #     install_response = requests.post(
-    #         "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     await wait_for_websocket_download_success_event(timeout=120)
-    #     assert install_response.status_code == 200
-
-    #     response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
-    #     assert response.status_code == 200
-
-    # def test_engines_install_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
-    #     self,
-    # ):
-    #     data = {"variant": "mac-arm64", "version": "v0.1.35"}
-    #     # install first
-    #     install_response = requests.post(
-    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     assert install_response.status_code == 200
-
-    #     response = requests.delete(
-    #         "http://localhost:3928/v1/engines/llama-cpp/install", json=data
-    #     )
-    #     assert response.status_code == 200
-
-    
\ No newline at end of file
diff --git a/engine/e2e-test/utils/logger.py b/engine/e2e-test/utils/logger.py
new file mode 100644
index 000000000..578a4f2cc
--- /dev/null
+++ b/engine/e2e-test/utils/logger.py
@@ -0,0 +1,15 @@
+import json
+import os
+
+def log_response(data, test_name):
+    """Log the data to a file named after the test."""
+    log_dir="e2e-test/logs"
+    os.makedirs(log_dir, exist_ok=True)  # Ensure log directory exists
+    file_path = os.path.join(log_dir, f"{test_name}.txt")  # Log file per test
+
+    try:
+        with open(file_path, "a", encoding="utf-8") as file:
+            json.dump(data, file, indent=4)
+            file.write("\n")  # Ensure a new line between entries
+    except Exception as e:
+        print(f"Error logging response: {e}")
diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/utils/test_runner.py
similarity index 100%
rename from engine/e2e-test/test_runner.py
rename to engine/e2e-test/utils/test_runner.py

From 08dd5dbb601b6584700b42b7f6777f2fc6d2a4b5 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Thu, 20 Feb 2025 13:41:03 +0700
Subject: [PATCH 04/23] test: revert wfl

---
 .github/workflows/cortex-cpp-quality-gate.yml | 110 +++++++++---------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 235eb4119..4e741040a 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -1,12 +1,12 @@
 name: CI Quality Gate Cortex CPP
 
 on:
-  # pull_request:
-  #   types: [opened, synchronize, reopened, ready_for_review]
-  #   paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
   workflow_dispatch:
-  # schedule:
-  #   - cron: '0 22 * * *'
+  schedule:
+    - cron: '0 22 * * *'
 
 env:
   LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
@@ -218,58 +218,58 @@ jobs:
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-  # build-docker-and-test:
-  #   runs-on: ubuntu-24-04-docker
-  #   steps:
-  #     - name: Getting the repo
-  #       uses: actions/checkout@v3
-  #       with:
-  #         submodules: 'recursive'
+  build-docker-and-test:
+    runs-on: ubuntu-24-04-docker
+    steps:
+      - name: Getting the repo
+        uses: actions/checkout@v3
+        with:
+          submodules: 'recursive'
       
-  #     - name: Run Docker
-  #       if: github.event_name != 'schedule'
-  #       run: |
-  #         docker build \
-  #           --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
-  #           --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
-  #           --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
-  #           --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
-  #           -t menloltd/cortex:test -f docker/Dockerfile.cache .
-  #         docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
-  #         sleep 20
+      - name: Run Docker
+        if: github.event_name != 'schedule'
+        run: |
+          docker build \
+            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+            -t menloltd/cortex:test -f docker/Dockerfile.cache .
+          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+          sleep 20
 
-  #     - name: Run Docker
-  #       if: github.event_name == 'schedule'
-  #       run: |
-  #         latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
-  #         echo "cortex.llamacpp latest release: $latest_prerelease"
-  #         docker build \
-  #           --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
-  #           --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
-  #           --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
-  #           --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
-  #           --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
-  #           -t menloltd/cortex:test -f docker/Dockerfile.cache .
-  #         docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
-  #         sleep 20
+      - name: Run Docker
+        if: github.event_name == 'schedule'
+        run: |
+          latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
+          echo "cortex.llamacpp latest release: $latest_prerelease"
+          docker build \
+            --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
+            --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
+            --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
+            --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
+            --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
+            -t menloltd/cortex:test -f docker/Dockerfile.cache .
+          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+          sleep 20
 
-  #     - name: use python
-  #       uses: actions/setup-python@v5
-  #       with:
-  #         python-version: "3.10"
+      - name: use python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
 
-  #     - name: Run e2e tests
-  #       run: |
-  #         cd engine
-  #         python -m pip install --upgrade pip
-  #         python -m pip install -r e2e-test/requirements.txt
-  #         pytest e2e-test/test_api_docker.py
+      - name: Run e2e tests
+        run: |
+          cd engine
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          pytest e2e-test/test_api_docker.py
 
-  #     - name: Run Docker
-  #       continue-on-error: true
-  #       if: always()  
-  #       run: |
-  #         docker logs cortex
-  #         docker stop cortex
-  #         docker rm cortex
-  #         echo "y\n" | docker system prune -af
+      - name: Run Docker
+        continue-on-error: true
+        if: always()  
+        run: |
+          docker logs cortex
+          docker stop cortex
+          docker rm cortex
+          echo "y\n" | docker system prune -af

From f656c68214b582d5733cb420bd0b136a73f282d5 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Thu, 20 Feb 2025 13:43:56 +0700
Subject: [PATCH 05/23] test: update log folder

---
 engine/e2e-test/utils/logger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/e2e-test/utils/logger.py b/engine/e2e-test/utils/logger.py
index 578a4f2cc..3417f07e2 100644
--- a/engine/e2e-test/utils/logger.py
+++ b/engine/e2e-test/utils/logger.py
@@ -3,7 +3,7 @@
 
 def log_response(data, test_name):
     """Log the data to a file named after the test."""
-    log_dir="e2e-test/logs"
+    log_dir="logs"
     os.makedirs(log_dir, exist_ok=True)  # Ensure log directory exists
     file_path = os.path.join(log_dir, f"{test_name}.txt")  # Log file per test
 

From 6e58dec740494e90e7d0cbdf5c7c526e252e7618 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Thu, 20 Feb 2025 14:27:00 +0700
Subject: [PATCH 06/23] test: move docker test to root

---
 engine/e2e-test/{api/model => }/test_api_docker.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename engine/e2e-test/{api/model => }/test_api_docker.py (100%)

diff --git a/engine/e2e-test/api/model/test_api_docker.py b/engine/e2e-test/test_api_docker.py
similarity index 100%
rename from engine/e2e-test/api/model/test_api_docker.py
rename to engine/e2e-test/test_api_docker.py

From 4249234dbcb3e30d9fa52e199d6448e73f86215f Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Fri, 21 Feb 2025 10:24:51 +0700
Subject: [PATCH 07/23] test: revert change wfl

---
 .github/workflows/cortex-cpp-quality-gate.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index d98631ef7..87be2cf47 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -168,7 +168,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main-harry.py
+          python e2e-test/main.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -182,7 +182,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main-harry.py
+          python e2e-test/main.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:
@@ -196,7 +196,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main-harry.py
+          python e2e-test/cortex-llamacpp-e2e-nightly.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -210,7 +210,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main-harry.py
+          python e2e-test/cortex-llamacpp-e2e-nightly.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:

From 1b5d382e397dc0b1ba6fc55165dc03c2390e48a8 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Fri, 21 Feb 2025 10:30:50 +0700
Subject: [PATCH 08/23] test: update path in wfl

---
 .github/workflows/cortex-cpp-quality-gate.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 87be2cf47..68d4d9c09 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -168,7 +168,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main.py
+          python e2e-test/runner/main.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -182,7 +182,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main.py
+          python e2e-test/runner/main.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:
@@ -196,7 +196,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          python e2e-test/runner/cortex-llamacpp-e2e-nightly.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -210,7 +210,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          python e2e-test/runner/cortex-llamacpp-e2e-nightly.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:
@@ -443,7 +443,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main.py
+          python e2e-test/runner/main.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -457,7 +457,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/main.py
+          python e2e-test/runner/main.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:
@@ -471,7 +471,7 @@ jobs:
           cp build/cortex build/cortex-beta
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          python e2e-test/runner/cortex-llamacpp-e2e-nightly.py
           rm build/cortex-nightly
           rm build/cortex-beta
         env:
@@ -485,7 +485,7 @@ jobs:
           cp build/cortex.exe build/cortex-beta.exe
           python -m pip install --upgrade pip
           python -m pip install -r e2e-test/requirements.txt
-          python e2e-test/cortex-llamacpp-e2e-nightly.py
+          python e2e-test/runner/cortex-llamacpp-e2e-nightly.py
           rm build/cortex-nightly.exe
           rm build/cortex-beta.exe
         env:

From 4fe771aa43c88478e12cb50a5b5ae3528d9dc1c3 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Fri, 21 Feb 2025 21:41:24 +0700
Subject: [PATCH 09/23] test: update path, add-update utils

---
 .../runner/cortex-llamacpp-e2e-nightly.py     | 26 +++++++++----------
 engine/e2e-test/runner/main.py                | 26 +++++++++----------
 .../test_api_cortexso_hub_llamacpp_engine.py  |  0
 engine/e2e-test/utils/assertion.py            |  3 +++
 engine/e2e-test/utils/logger.py               |  2 +-
 5 files changed, 30 insertions(+), 27 deletions(-)
 rename engine/e2e-test/{api/hub => }/test_api_cortexso_hub_llamacpp_engine.py (100%)
 create mode 100644 engine/e2e-test/utils/assertion.py

diff --git a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
index 193d26d36..711f9e44c 100644
--- a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
+++ b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
@@ -16,21 +16,21 @@
 sys.path.append(os.path.join(PROJECT_ROOT, "cli/common"))  
 
 ### e2e tests are expensive, have to keep engines tests in order
-from api.engines.test_api_engine_list import TestApiEngineList
-from api.engines.test_api_engine_install_nightly import TestApiEngineInstall
-from api.model.test_api_model import TestApiModel
-from api.model.test_api_model_import import TestApiModelImport
+from test_api_get_list_engine import TestApiEngineList
+from test_api_engine_install_nightly import TestApiEngineInstall
+from test_api_model import TestApiModel
+from test_api_model_import import TestApiModelImport
 
 ###
-from cli.engines.test_cli_engine_get import TestCliEngineGet
-from cli.engines.test_cli_engine_install_nightly import TestCliEngineInstall
-from cli.engines.test_cli_engine_list import TestCliEngineList
-from cli.engines.test_cli_engine_uninstall import TestCliEngineUninstall
-from cli.model.test_cli_model import TestCliModel
-from cli.model.test_cli_model_import import TestCliModelImport
-from cli.common.test_cli_server_start import TestCliServerStart
-from cli.common.test_cortex_update import TestCortexUpdate
-from cli.common.test_create_log_folder import TestCreateLogFolder
+from test_cli_engine_get import TestCliEngineGet
+from test_cli_engine_install_nightly import TestCliEngineInstall
+from test_cli_engine_list import TestCliEngineList
+from test_cli_engine_uninstall import TestCliEngineUninstall
+from test_cli_model import TestCliModel
+from test_cli_model_import import TestCliModelImport
+from test_cli_server_start import TestCliServerStart
+from test_cortex_update import TestCortexUpdate
+from test_create_log_folder import TestCreateLogFolder
 
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/runner/main.py b/engine/e2e-test/runner/main.py
index 7a96f1164..2d7dcc1cb 100644
--- a/engine/e2e-test/runner/main.py
+++ b/engine/e2e-test/runner/main.py
@@ -16,21 +16,21 @@
 sys.path.append(os.path.join(PROJECT_ROOT, "cli/common"))  
 
 ### e2e tests are expensive, have to keep engines tests in order
-from api.engines.test_api_engine_list import TestApiEngineList
-from api.engines.test_api_engine import TestApiEngine
-from api.model.test_api_model import TestApiModel
-from api.model.test_api_model_import import TestApiModelImport
+from test_api_get_list_engine import TestApiEngineList
+from test_api_engine import TestApiEngine
+from test_api_model import TestApiModel
+from test_api_model_import import TestApiModelImport
 
 ###
-from cli.engines.test_cli_engine_get import TestCliEngineGet
-from cli.engines.test_cli_engine_install import TestCliEngineInstall
-from cli.engines.test_cli_engine_list import TestCliEngineList
-from cli.engines.test_cli_engine_uninstall import TestCliEngineUninstall
-from cli.model.test_cli_model import TestCliModel
-from cli.model.test_cli_model_import import TestCliModelImport
-from cli.common.test_cli_server_start import TestCliServerStart
-from cli.common.test_cortex_update import TestCortexUpdate
-from cli.common.test_create_log_folder import TestCreateLogFolder
+from test_cli_engine_get import TestCliEngineGet
+from test_cli_engine_install import TestCliEngineInstall
+from test_cli_engine_list import TestCliEngineList
+from test_cli_engine_uninstall import TestCliEngineUninstall
+from test_cli_model import TestCliModel
+from test_cli_model_import import TestCliModelImport
+from test_cli_server_start import TestCliServerStart
+from test_cortex_update import TestCortexUpdate
+from test_create_log_folder import TestCreateLogFolder
 
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/api/hub/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
similarity index 100%
rename from engine/e2e-test/api/hub/test_api_cortexso_hub_llamacpp_engine.py
rename to engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
diff --git a/engine/e2e-test/utils/assertion.py b/engine/e2e-test/utils/assertion.py
new file mode 100644
index 000000000..067335732
--- /dev/null
+++ b/engine/e2e-test/utils/assertion.py
@@ -0,0 +1,3 @@
+def assert_equal(actual, expected):
+    """Custom assertion to compare actual and expected values."""
+    assert actual == expected, f"Assertion failed: Expected {expected}, but got {actual}"
diff --git a/engine/e2e-test/utils/logger.py b/engine/e2e-test/utils/logger.py
index 3417f07e2..578a4f2cc 100644
--- a/engine/e2e-test/utils/logger.py
+++ b/engine/e2e-test/utils/logger.py
@@ -3,7 +3,7 @@
 
 def log_response(data, test_name):
     """Log the data to a file named after the test."""
-    log_dir="logs"
+    log_dir="e2e-test/logs"
     os.makedirs(log_dir, exist_ok=True)  # Ensure log directory exists
     file_path = os.path.join(log_dir, f"{test_name}.txt")  # Log file per test
 

From d3b05f66888058479d0c6bfa89b327d39a60d25c Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Sat, 22 Feb 2025 20:12:09 +0700
Subject: [PATCH 10/23] test: add test

---
 .../api/engines/test_api_engine_list.py       | 25 ------
 .../engines/test_api_get_default_engine.py    | 85 +++++++++++++++++++
 .../api/engines/test_api_get_list_engine.py   | 73 ++++++++++++++++
 3 files changed, 158 insertions(+), 25 deletions(-)
 delete mode 100644 engine/e2e-test/api/engines/test_api_engine_list.py
 create mode 100644 engine/e2e-test/api/engines/test_api_get_default_engine.py
 create mode 100644 engine/e2e-test/api/engines/test_api_get_list_engine.py

diff --git a/engine/e2e-test/api/engines/test_api_engine_list.py b/engine/e2e-test/api/engines/test_api_engine_list.py
deleted file mode 100644
index 10346c988..000000000
--- a/engine/e2e-test/api/engines/test_api_engine_list.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import pytest
-import requests
-from utils.test_runner import start_server, stop_server
-
-
-class TestApiEngineList:
-
-    @pytest.fixture(autouse=True)
-    def setup_and_teardown(self):
-        # Setup
-        # Not sure why but on macOS amd, the first start server timeouts with CI
-        start_server()
-        stop_server()
-        success = start_server()
-        if not success:
-            raise Exception("Failed to start server")
-
-        yield
-
-        # Teardown
-        stop_server()
-
-    def test_engines_list_api_run_successfully(self):
-        response = requests.get("http://localhost:3928/engines")
-        assert response.status_code == 200
\ No newline at end of file
diff --git a/engine/e2e-test/api/engines/test_api_get_default_engine.py b/engine/e2e-test/api/engines/test_api_get_default_engine.py
new file mode 100644
index 000000000..600f2c38f
--- /dev/null
+++ b/engine/e2e-test/api/engines/test_api_get_default_engine.py
@@ -0,0 +1,85 @@
+import pytest
+import requests
+from utils.test_runner import start_server, stop_server
+import jsonschema
+from tenacity import retry, wait_exponential, stop_after_attempt
+from utils.logger import log_response
+from utils.assertion import assert_equal
+
+
+class TestApiDefaultEngine:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_api_get_default_engine_successfully(self):
+        # Data test
+        engine= "llama-cpp"
+        name= "linux-amd64-avx-cuda-11-7"
+        version= "v0.1.35-27.10.24"
+    
+        data = {"version": version, "variant": name}
+        post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
+        response = requests.post(
+            post_install_url, json=data
+        )
+        assert_equal(response.status_code,200)
+        
+        get_list_url = f"http://localhost:3928/v1/engines/{engine}"
+        get_default_url = f"http://localhost:3928/v1/engines/{engine}/default"
+
+        @retry(
+            wait=wait_exponential(multiplier=2, min=2, max=30), 
+            stop=stop_after_attempt(5) 
+        )
+        def get_request(url):
+            response = requests.get(url)
+            assert len(response.json()) > 0
+
+        get_request(get_list_url)
+        
+        response_default_engine = requests.get(get_default_url)
+        json_data = response_default_engine.json()
+
+        log_response(json_data, "test_api_get_default_engine_successfully")
+        assert_equal(response_default_engine.status_code, 200)
+
+        schema = {
+            "type": "object",
+            "properties": {
+                "engine": {"type": "string"},
+                "variant": {"type": "string"},
+                "version": {"type": "string"}
+            },
+            "required": ["engine", "variant", "version"]
+        }
+
+        # Validate response schema
+        jsonschema.validate(instance=json_data, schema=schema)
+        
+        assert_equal(json_data["engine"], engine)
+        assert_equal(json_data["version"], version)
+        assert_equal(json_data["variant"], name)
+        
+    def test_api_get_default_engine_successfully(self):
+        # Data test
+        engine= "invalid"
+    
+        get_default_url = f"http://localhost:3928/v1/engines/{engine}/default"
+
+        response_default_engine = requests.get(get_default_url)
+        json_data_get_default = response_default_engine.json()
+
+        log_response(json_data_get_default, "test_api_get_default_engine_successfully")
+        assert_equal(response_default_engine.status_code, 400)
+
+        assert_equal(json_data_get_default["message"], f"Engine {engine} is not supported yet!")
\ No newline at end of file
diff --git a/engine/e2e-test/api/engines/test_api_get_list_engine.py b/engine/e2e-test/api/engines/test_api_get_list_engine.py
new file mode 100644
index 000000000..9a1552de6
--- /dev/null
+++ b/engine/e2e-test/api/engines/test_api_get_list_engine.py
@@ -0,0 +1,73 @@
+import pytest
+import requests
+from utils.test_runner import start_server, stop_server
+import jsonschema
+from tenacity import retry, wait_exponential, stop_after_attempt
+from utils.logger import log_response
+from utils.assertion import assert_equal
+
+
+class TestApiEngineList:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_api_get_list_engines_successfully(self):
+        # Data test
+        engine= "llama-cpp"
+        name= "linux-amd64-avx-cuda-11-7"
+        version= "v0.1.35-27.10.24"
+    
+        data = {"version": version, "variant": name}
+        post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
+        response = requests.post(
+            post_install_url, json=data
+        )
+        assert_equal(response.status_code,200)
+        
+        get_list_url = f"http://localhost:3928/v1/engines/{engine}"
+
+        @retry(
+            wait=wait_exponential(multiplier=2, min=2, max=30), 
+            stop=stop_after_attempt(5) 
+        )
+        def get_request(url):
+            response = requests.get(url)
+            assert len(response.json()) > 0
+            return response
+
+        response_get_list = get_request(get_list_url)
+        json_data = response_get_list.json()
+
+        log_response(json_data, "test_api_get_list_engines_successfully")
+        assert_equal(response_get_list.status_code, 200)
+
+        schema = {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "engine": {"type": "string"},
+                    "name": {"type": "string"},
+                    "version": {"type": "string"}
+                },
+                "required": ["engine", "name", "version"]
+            }
+        }
+
+        # Validate response schema
+        jsonschema.validate(instance=json_data, schema=schema)
+        
+        assert_equal(len(json_data), 1)
+        assert_equal(json_data[0]["engine"], engine)
+        assert_equal(json_data[0]["version"], version)
+        assert_equal(json_data[0]["name"], name)
\ No newline at end of file

From 8a20ce739d042d0d849bf65c1ca89ad418e853fa Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Sat, 22 Feb 2025 20:29:33 +0700
Subject: [PATCH 11/23] test: update path

---
 .../runner/cortex-llamacpp-e2e-nightly.py     | 26 +++++++++----------
 engine/e2e-test/runner/main.py                | 26 +++++++++----------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
index 711f9e44c..f4aca303d 100644
--- a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
+++ b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
@@ -16,21 +16,21 @@
 sys.path.append(os.path.join(PROJECT_ROOT, "cli/common"))  
 
 ### e2e tests are expensive, have to keep engines tests in order
-from test_api_get_list_engine import TestApiEngineList
-from test_api_engine_install_nightly import TestApiEngineInstall
-from test_api_model import TestApiModel
-from test_api_model_import import TestApiModelImport
+from api.engines.test_api_get_list_engine import TestApiEngineList
+from api.engines.test_api_engine_install_nightly import TestApiEngineInstall
+from api.model.test_api_model import TestApiModel
+from api.model.test_api_model_import import TestApiModelImport
 
 ###
-from test_cli_engine_get import TestCliEngineGet
-from test_cli_engine_install_nightly import TestCliEngineInstall
-from test_cli_engine_list import TestCliEngineList
-from test_cli_engine_uninstall import TestCliEngineUninstall
-from test_cli_model import TestCliModel
-from test_cli_model_import import TestCliModelImport
-from test_cli_server_start import TestCliServerStart
-from test_cortex_update import TestCortexUpdate
-from test_create_log_folder import TestCreateLogFolder
+from cli.engines.test_cli_engine_get import TestCliEngineGet
+from cli.engines.test_cli_engine_install_nightly import TestCliEngineInstall
+from cli.engines.test_cli_engine_list import TestCliEngineList
+from cli.engines.test_cli_engine_uninstall import TestCliEngineUninstall
+from cli.model.test_cli_model import TestCliModel
+from cli.model.test_cli_model_import import TestCliModelImport
+from cli.common.test_cli_server_start import TestCliServerStart
+from cli.common.test_cortex_update import TestCortexUpdate
+from cli.common.test_create_log_folder import TestCreateLogFolder
 
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__, "-v"]))
diff --git a/engine/e2e-test/runner/main.py b/engine/e2e-test/runner/main.py
index 2d7dcc1cb..a8c32bbac 100644
--- a/engine/e2e-test/runner/main.py
+++ b/engine/e2e-test/runner/main.py
@@ -16,21 +16,21 @@
 sys.path.append(os.path.join(PROJECT_ROOT, "cli/common"))  
 
 ### e2e tests are expensive, have to keep engines tests in order
-from test_api_get_list_engine import TestApiEngineList
-from test_api_engine import TestApiEngine
-from test_api_model import TestApiModel
-from test_api_model_import import TestApiModelImport
+from api.engines.test_api_get_list_engine import TestApiEngineList
+from api.engines.test_api_engine import TestApiEngine
+from api.model.test_api_model import TestApiModel
+from api.model.test_api_model_import import TestApiModelImport
 
 ###
-from test_cli_engine_get import TestCliEngineGet
-from test_cli_engine_install import TestCliEngineInstall
-from test_cli_engine_list import TestCliEngineList
-from test_cli_engine_uninstall import TestCliEngineUninstall
-from test_cli_model import TestCliModel
-from test_cli_model_import import TestCliModelImport
-from test_cli_server_start import TestCliServerStart
-from test_cortex_update import TestCortexUpdate
-from test_create_log_folder import TestCreateLogFolder
+from cli.engines.test_cli_engine_get import TestCliEngineGet
+from cli.engines.test_cli_engine_install import TestCliEngineInstall
+from cli.engines.test_cli_engine_list import TestCliEngineList
+from cli.engines.test_cli_engine_uninstall import TestCliEngineUninstall
+from cli.model.test_cli_model import TestCliModel
+from cli.model.test_cli_model_import import TestCliModelImport
+from cli.common.test_cli_server_start import TestCliServerStart
+from cli.common.test_cortex_update import TestCortexUpdate
+from cli.common.test_create_log_folder import TestCreateLogFolder
 
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__, "-v"]))

From 43a1b2090575511ccb1301b1947a6175a97e0811 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Sat, 22 Feb 2025 20:42:48 +0700
Subject: [PATCH 12/23] test: add tenacity to package

---
 engine/e2e-test/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt
index d9c436a11..6991b03d3 100644
--- a/engine/e2e-test/requirements.txt
+++ b/engine/e2e-test/requirements.txt
@@ -3,4 +3,5 @@ pytest
 pytest-asyncio
 requests
 pyyaml
-jsonschema
\ No newline at end of file
+jsonschema
+tenacity
\ No newline at end of file

From 53e87d02a76a012c25cb9a8e8d5842e86ce6b080 Mon Sep 17 00:00:00 2001
From: Harry Le <harry@menlo.ai>
Date: Sun, 23 Feb 2025 08:17:28 +0700
Subject: [PATCH 13/23] test: add more

---
 .../engines/test_api_get_default_engine.py    |  9 +--
 .../engines/test_api_get_engine_release.py    | 76 +++++++++++++++++++
 .../test_api_get_engine_release_latest.py     | 73 ++++++++++++++++++
 .../runner/cortex-llamacpp-e2e-nightly.py     |  3 +
 engine/e2e-test/runner/main.py                |  3 +
 engine/e2e-test/utils/assertion.py            |  6 +-
 6 files changed, 163 insertions(+), 7 deletions(-)
 create mode 100644 engine/e2e-test/api/engines/test_api_get_engine_release.py
 create mode 100644 engine/e2e-test/api/engines/test_api_get_engine_release_latest.py

diff --git a/engine/e2e-test/api/engines/test_api_get_default_engine.py b/engine/e2e-test/api/engines/test_api_get_default_engine.py
index 600f2c38f..0320c1cc1 100644
--- a/engine/e2e-test/api/engines/test_api_get_default_engine.py
+++ b/engine/e2e-test/api/engines/test_api_get_default_engine.py
@@ -33,6 +33,7 @@ def test_api_get_default_engine_successfully(self):
             post_install_url, json=data
         )
         assert_equal(response.status_code,200)
+        log_response(response.json(), "test_api_get_default_engine_successfully")
         
         get_list_url = f"http://localhost:3928/v1/engines/{engine}"
         get_default_url = f"http://localhost:3928/v1/engines/{engine}/default"
@@ -66,11 +67,7 @@ def get_request(url):
         # Validate response schema
         jsonschema.validate(instance=json_data, schema=schema)
         
-        assert_equal(json_data["engine"], engine)
-        assert_equal(json_data["version"], version)
-        assert_equal(json_data["variant"], name)
-        
-    def test_api_get_default_engine_successfully(self):
+    def test_api_get_default_engine_failed_invalid_engine(self):
         # Data test
         engine= "invalid"
     
@@ -79,7 +76,7 @@ def test_api_get_default_engine_successfully(self):
         response_default_engine = requests.get(get_default_url)
         json_data_get_default = response_default_engine.json()
 
-        log_response(json_data_get_default, "test_api_get_default_engine_successfully")
+        log_response(json_data_get_default, "test_api_get_default_engine_failed_invalid_engine")
         assert_equal(response_default_engine.status_code, 400)
 
         assert_equal(json_data_get_default["message"], f"Engine {engine} is not supported yet!")
\ No newline at end of file
diff --git a/engine/e2e-test/api/engines/test_api_get_engine_release.py b/engine/e2e-test/api/engines/test_api_get_engine_release.py
new file mode 100644
index 000000000..1e68de121
--- /dev/null
+++ b/engine/e2e-test/api/engines/test_api_get_engine_release.py
@@ -0,0 +1,76 @@
+import pytest
+import requests
+from utils.test_runner import start_server, stop_server
+import jsonschema
+from tenacity import retry, wait_exponential, stop_after_attempt
+from utils.logger import log_response
+from utils.assertion import assert_equal, assert_contains
+
+
+class TestApiEngineRelease:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_api_get_engine_release_successfully(self):
+        # Data test
+        engine= "llama-cpp"
+        get_release_url = f"http://localhost:3928/v1/engines/{engine}/releases"
+
+        @retry(
+            wait=wait_exponential(multiplier=2, min=2, max=30), 
+            stop=stop_after_attempt(5) 
+        )
+        def get_request(url):
+            response = requests.get(url)
+            assert len(response.json()) > 0
+
+        get_request(get_release_url)
+        
+        response_engine_release = requests.get(get_release_url)
+        json_data = response_engine_release.json()
+
+        log_response(json_data, "test_api_get_engine_release_successfully")
+        assert_equal(response_engine_release.status_code, 200)
+
+        schema = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                "draft": { "type": "boolean" },
+                "name": { "type": "string" },
+                "prerelease": { "type": "boolean" },
+                "published_at": { "type": "string", "format": "date-time" },
+                "url": { "type": "string", "format": "uri" }
+                },
+                "required": ["draft", "name", "prerelease", "published_at", "url"]
+            }
+        }
+
+        # Validate response schema
+        jsonschema.validate(instance=json_data, schema=schema)
+        
+    def test_api_ge_engine_release_failed_invalid_engine(self):
+        # Data test
+        engine= "invalid"
+    
+        get_default_url = f"http://localhost:3928/v1/engines/{engine}/releases"
+
+        response_default_engine = requests.get(get_default_url)
+        json_data_get_default = response_default_engine.json()
+
+        log_response(json_data_get_default, "test_api_ge_engine_release_failed_invalid_engine")
+        assert_equal(response_default_engine.status_code, 400)
+
+        assert_contains(json_data_get_default["message"], "Not Found")
\ No newline at end of file
diff --git a/engine/e2e-test/api/engines/test_api_get_engine_release_latest.py b/engine/e2e-test/api/engines/test_api_get_engine_release_latest.py
new file mode 100644
index 000000000..be65141ea
--- /dev/null
+++ b/engine/e2e-test/api/engines/test_api_get_engine_release_latest.py
@@ -0,0 +1,73 @@
+import pytest
+import requests
+from utils.test_runner import start_server, stop_server
+import jsonschema
+from tenacity import retry, wait_exponential, stop_after_attempt
+from utils.logger import log_response
+from utils.assertion import assert_equal, assert_contains
+
+
+class TestApiEngineReleaseLatest:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+
+    def test_api_get_engine_release_latest_successfully(self):
+        # Data test
+        engine= "llama-cpp"
+        get_release_url = f"http://localhost:3928/v1/engines/{engine}/releases/latest"
+
+        @retry(
+            wait=wait_exponential(multiplier=2, min=2, max=30), 
+            stop=stop_after_attempt(5) 
+        )
+        def get_request(url):
+            response = requests.get(url)
+            assert len(response.json()) > 0
+
+        get_request(get_release_url)
+        
+        response_engine_release = requests.get(get_release_url)
+        json_data = response_engine_release.json()
+
+        log_response(json_data, "test_api_get_engine_release_latest_successfully")
+        assert_equal(response_engine_release.status_code, 200)
+
+        schema = {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                "created_at": {
+                    "type": "string",
+                    "format": "date-time"
+                },
+                "download_count": {
+                    "type": "integer",
+                    "minimum": 0
+                },
+                "name": {
+                    "type": "string"
+                },
+                "size": {
+                    "type": "integer",
+                    "minimum": 0
+                }
+                },
+                "required": ["created_at", "download_count", "name", "size"]
+            }
+        }
+
+
+        # Validate response schema
+        jsonschema.validate(instance=json_data, schema=schema)
\ No newline at end of file
diff --git a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
index f4aca303d..61b153267 100644
--- a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
+++ b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
@@ -18,6 +18,9 @@
 ### e2e tests are expensive, have to keep engines tests in order
 from api.engines.test_api_get_list_engine import TestApiEngineList
 from api.engines.test_api_engine_install_nightly import TestApiEngineInstall
+from api.engines.test_api_get_default_engine import TestApiDefaultEngine
+from api.engines.test_api_get_engine_release import TestApiEngineRelease
+from api.engines.test_api_get_engine_release_latest import TestApiEngineReleaseLatest
 from api.model.test_api_model import TestApiModel
 from api.model.test_api_model_import import TestApiModelImport
 
diff --git a/engine/e2e-test/runner/main.py b/engine/e2e-test/runner/main.py
index a8c32bbac..9b2a0316c 100644
--- a/engine/e2e-test/runner/main.py
+++ b/engine/e2e-test/runner/main.py
@@ -18,6 +18,9 @@
 ### e2e tests are expensive, have to keep engines tests in order
 from api.engines.test_api_get_list_engine import TestApiEngineList
 from api.engines.test_api_engine import TestApiEngine
+from api.engines.test_api_get_default_engine import TestApiDefaultEngine
+from api.engines.test_api_get_engine_release import TestApiEngineRelease
+from api.engines.test_api_get_engine_release_latest import TestApiEngineReleaseLatest
 from api.model.test_api_model import TestApiModel
 from api.model.test_api_model_import import TestApiModelImport
 
diff --git a/engine/e2e-test/utils/assertion.py b/engine/e2e-test/utils/assertion.py
index 067335732..63443981f 100644
--- a/engine/e2e-test/utils/assertion.py
+++ b/engine/e2e-test/utils/assertion.py
@@ -1,3 +1,7 @@
 def assert_equal(actual, expected):
     """Custom assertion to compare actual and expected values."""
-    assert actual == expected, f"Assertion failed: Expected {expected}, but got {actual}"
+    assert actual == expected, f"Assertion failed: Expected '{expected}', but got '{actual}'"
+    
+def assert_contains(main_string, sub_string):
+    """Custom assertion to compare actual and expected values."""
+    assert sub_string in main_string, f"Assertion failed: Expected '{main_string}' has '{sub_string}'"

From 835e68f1e4b76780901b9f851f6810f040e2329d Mon Sep 17 00:00:00 2001
From: Thien Tran <gau.nernst@yahoo.com.sg>
Date: Mon, 24 Feb 2025 10:10:30 +0800
Subject: [PATCH 14/23] chore: consolidate subprocess utils (#2011)

---
 .../extensions/python-engine/python_engine.cc | 27 ++------
 .../extensions/python-engine/python_engine.h  |  1 -
 engine/utils/process/utils.cc                 | 68 ++++++++++++++++++-
 engine/utils/process/utils.h                  |  4 +-
 engine/utils/process_status_utils.h           | 56 ---------------
 5 files changed, 73 insertions(+), 83 deletions(-)
 delete mode 100644 engine/utils/process_status_utils.h

diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc
index d34f75c08..685301b47 100644
--- a/engine/extensions/python-engine/python_engine.cc
+++ b/engine/extensions/python-engine/python_engine.cc
@@ -89,30 +89,11 @@ bool PythonEngine::TerminateModelProcess(const std::string& model) {
     return false;
   }
 
-#if defined(_WIN32)
-  HANDLE hProcess = OpenProcess(PROCESS_TERMINATE, FALSE, it->second);
-  if (hProcess == NULL) {
-    LOG_ERROR << "Failed to open process";
-    return false;
-  }
-
-  bool terminated = TerminateProcess(hProcess, 0) == TRUE;
-  CloseHandle(hProcess);
-
-  if (terminated) {
+  bool success = cortex::process::KillProcess(it->second);
+  if (success) {
     process_map_.erase(it);
-    return true;
   }
-
-#elif defined(__APPLE__) || defined(__linux__)
-  int result = kill(it->second, SIGTERM);
-  if (result == 0) {
-    process_map_.erase(it);
-    return true;
-  }
-#endif
-
-  return false;
+  return success;
 }
 
 CurlResponse PythonEngine::MakeGetRequest(const std::string& model,
@@ -823,7 +804,7 @@ void PythonEngine::GetModelStatus(
   auto model_config = models_[model];
   auto health_endpoint = model_config.heath_check;
   auto pid = process_map_[model];
-  auto is_process_live = process_status_utils::IsProcessRunning(pid);
+  auto is_process_live = cortex::process::IsProcessAlive(pid);
   auto response_health = MakeGetRequest(model, health_endpoint.path);
 
   if (response_health.error && is_process_live) {
diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h
index 70a9b9829..842ce8259 100644
--- a/engine/extensions/python-engine/python_engine.h
+++ b/engine/extensions/python-engine/python_engine.h
@@ -14,7 +14,6 @@
 #include "extensions/template_renderer.h"
 #include "utils/file_logger.h"
 #include "utils/file_manager_utils.h"
-#include "utils/process_status_utils.h"
 #include "utils/curl_utils.h"
 #include "utils/process/utils.h"
 
diff --git a/engine/utils/process/utils.cc b/engine/utils/process/utils.cc
index fef425803..f81796c5a 100644
--- a/engine/utils/process/utils.cc
+++ b/engine/utils/process/utils.cc
@@ -1,7 +1,9 @@
 #include "utils/process/utils.h"
 #include "utils/logging_utils.h"
 
-#if defined(__APPLE__) || defined(__linux__)
+#if defined(_WIN32)
+#include <tlhelp32.h>
+#elif defined(__APPLE__) || defined(__linux__)
 extern char **environ;  // environment variables
 #endif
 
@@ -103,4 +105,66 @@ pid_t SpawnProcess(const std::vector<std::string>& command) {
   }
 }
 
-}  // namespace cortex::process
\ No newline at end of file
+bool IsProcessAlive(pid_t pid) {
+#ifdef _WIN32
+  // Windows implementation
+  HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
+  if (snapshot == INVALID_HANDLE_VALUE) {
+    return false;
+  }
+
+  PROCESSENTRY32 processEntry = {0};
+  processEntry.dwSize = sizeof(processEntry);
+
+  if (Process32First(snapshot, &processEntry)) {
+    do {
+      if (processEntry.th32ProcessID == pid) {
+        CloseHandle(snapshot);
+        return true;
+      }
+    } while (Process32Next(snapshot, &processEntry));
+  }
+
+  CloseHandle(snapshot);
+  return false;
+
+#elif defined(__APPLE__) || defined(__linux__)
+  // Unix-like systems (Linux and macOS) implementation
+  if (pid <= 0) {
+    return false;
+  }
+
+  // Try to send signal 0 to the process
+  // This doesn't actually send a signal but checks if we can send signals to the process
+  int result = kill(pid, 0);
+
+  if (result == 0) {
+    return true;  // Process exists and we have permission to send it signals
+  }
+
+  return errno != ESRCH;  // ESRCH means "no such process"
+#else
+#error "Unsupported platform"
+#endif
+}
+
+bool KillProcess(pid_t pid) {
+#if defined(_WIN32)
+  HANDLE hProcess = OpenProcess(PROCESS_TERMINATE, FALSE, pid);
+  if (hProcess == NULL) {
+    LOG_ERROR << "Failed to open process";
+    return false;
+  }
+
+  bool is_success = TerminateProcess(hProcess, 0) == TRUE;
+  CloseHandle(hProcess);
+  return is_success;
+#elif defined(__APPLE__) || defined(__linux__)
+  // NOTE: should we use SIGKILL here to be consistent with Windows?
+  return kill(pid, SIGTERM) == 0;
+#else
+#error "Unsupported platform"
+#endif
+}
+
+}  // namespace cortex::process
diff --git a/engine/utils/process/utils.h b/engine/utils/process/utils.h
index 9332607e9..2a5c62dfa 100644
--- a/engine/utils/process/utils.h
+++ b/engine/utils/process/utils.h
@@ -21,5 +21,7 @@ std::string ConstructWindowsCommandLine(const std::vector<std::string>& args);
 std::vector<char*> ConvertToArgv(const std::vector<std::string>& args);
 
 pid_t SpawnProcess(const std::vector<std::string>& command);
+bool IsProcessAlive(pid_t pid);
+bool KillProcess(pid_t pid);
 
-}
\ No newline at end of file
+}
diff --git a/engine/utils/process_status_utils.h b/engine/utils/process_status_utils.h
deleted file mode 100644
index 189f82ede..000000000
--- a/engine/utils/process_status_utils.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <iostream>
-
-#ifdef _WIN32
-#include <tlhelp32.h>
-#include <windows.h>
-#include <process.h>
-using pid_t = DWORD;
-#elif defined(__APPLE__) || defined(__linux__)
-#include <errno.h>
-#include <signal.h>
-#endif
-namespace process_status_utils {
-
-inline bool IsProcessRunning(pid_t pid) {
-#ifdef _WIN32
-  // Windows implementation
-  HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
-  if (snapshot == INVALID_HANDLE_VALUE) {
-    return false;
-  }
-
-  PROCESSENTRY32 processEntry = {0};
-  processEntry.dwSize = sizeof(processEntry);
-
-  if (Process32First(snapshot, &processEntry)) {
-    do {
-      if (processEntry.th32ProcessID == pid) {
-        CloseHandle(snapshot);
-        return true;
-      }
-    } while (Process32Next(snapshot, &processEntry));
-  }
-
-  CloseHandle(snapshot);
-  return false;
-
-#elif defined(__APPLE__) || defined(__linux__)
-  // Unix-like systems (Linux and macOS) implementation
-  if (pid <= 0) {
-    return false;
-  }
-
-  // Try to send signal 0 to the process
-  // This doesn't actually send a signal but checks if we can send signals to the process
-  int result = kill(pid, 0);
-
-  if (result == 0) {
-    return true;  // Process exists and we have permission to send it signals
-  }
-
-  return errno != ESRCH;  // ESRCH means "no such process"
-#else
-#error "Unsupported platform"
-#endif
-}
-}  // namespace process_status_utils
\ No newline at end of file

From cb2feb96b375bb5f6bc173baa78022d81ef187b0 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Mon, 24 Feb 2025 13:02:26 +0530
Subject: [PATCH 15/23] Fix: Include algorithm header file to fix build on
 distro other than Ubuntu

---
 engine/common/download_task_queue.h            | 1 +
 engine/repositories/assistant_fs_repository.cc | 1 +
 engine/repositories/file_fs_repository.cc      | 1 +
 engine/repositories/thread_fs_repository.cc    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/engine/common/download_task_queue.h b/engine/common/download_task_queue.h
index 5991687b7..345f81512 100644
--- a/engine/common/download_task_queue.h
+++ b/engine/common/download_task_queue.h
@@ -6,6 +6,7 @@
 #include <string>
 #include <unordered_map>
 #include "common/download_task.h"
+#include <algorithm>
 
 class DownloadTaskQueue {
  private:
diff --git a/engine/repositories/assistant_fs_repository.cc b/engine/repositories/assistant_fs_repository.cc
index 87b4174fd..30bed1663 100644
--- a/engine/repositories/assistant_fs_repository.cc
+++ b/engine/repositories/assistant_fs_repository.cc
@@ -4,6 +4,7 @@
 #include <fstream>
 #include <mutex>
 #include "utils/result.hpp"
+#include <algorithm>
 
 cpp::result<std::vector<OpenAi::Assistant>, std::string>
 AssistantFsRepository::ListAssistants(uint8_t limit, const std::string& order,
diff --git a/engine/repositories/file_fs_repository.cc b/engine/repositories/file_fs_repository.cc
index 543b86926..3ddb90977 100644
--- a/engine/repositories/file_fs_repository.cc
+++ b/engine/repositories/file_fs_repository.cc
@@ -5,6 +5,7 @@
 #include "database/file.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
+#include <algorithm>
 
 std::filesystem::path FileFsRepository::GetFilePath() const {
   return data_folder_path_ / kFileContainerFolderName;
diff --git a/engine/repositories/thread_fs_repository.cc b/engine/repositories/thread_fs_repository.cc
index 6b75db8e4..36650be76 100644
--- a/engine/repositories/thread_fs_repository.cc
+++ b/engine/repositories/thread_fs_repository.cc
@@ -3,6 +3,7 @@
 #include <mutex>
 #include "common/assistant.h"
 #include "utils/result.hpp"
+#include <algorithm>
 
 cpp::result<std::vector<OpenAi::Thread>, std::string>
 ThreadFsRepository::ListThreads(uint8_t limit, const std::string& order,

From b4164c63b84b74e4796877cb5ec065f8eec1c877 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Mon, 24 Feb 2025 14:33:11 +0700
Subject: [PATCH 16/23] fix: append stop words from request in case of using
 template renderer for local engine (#2017)

* fix: append stop words from request in case of using template renderer for local engine

* chore: updated minja to fix crlf on Windows

---------

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/services/inference_service.cc |  18 +-
 engine/utils/minja.hpp               | 412 ++++++++++++++++++++-------
 2 files changed, 320 insertions(+), 110 deletions(-)

diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
index 713e1e1ee..0a52665ad 100644
--- a/engine/services/inference_service.cc
+++ b/engine/services/inference_service.cc
@@ -66,9 +66,21 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
             tokenizer->add_eos_token, tokenizer->add_generation_prompt);
         if (prompt_result.has_value()) {
           (*json_body)["prompt"] = prompt_result.value();
-          Json::Value stops(Json::arrayValue);
-          stops.append(tokenizer->eos_token);
-          (*json_body)["stop"] = stops;
+          if (json_body->isMember("stop")) {
+            bool need_append = true;
+            for (auto& s : (*json_body)["stop"]) {
+              if (s.asString() == tokenizer->eos_token) {
+                need_append = false;
+              }
+            }
+            if (need_append) {
+              (*json_body)["stop"].append(tokenizer->eos_token);
+            }
+          } else {
+            Json::Value stops(Json::arrayValue);
+            stops.append(tokenizer->eos_token);
+            (*json_body)["stop"] = stops;
+          }
         } else {
           CTL_ERR("Failed to render prompt: " + prompt_result.error());
         }
diff --git a/engine/utils/minja.hpp b/engine/utils/minja.hpp
index 76f2110f2..47f9694ca 100644
--- a/engine/utils/minja.hpp
+++ b/engine/utils/minja.hpp
@@ -10,7 +10,6 @@
 
 #include <iostream>
 #include <memory>
-#include <nlohmann/json.hpp>
 #include <regex>
 #include <sstream>
 #include <stdexcept>
@@ -18,6 +17,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include <nlohmann/json.hpp>
+
 using json = nlohmann::ordered_json;
 
 namespace minja {
@@ -32,6 +33,15 @@ struct Options {
 
 struct ArgumentsValue;
 
+inline std::string normalize_newlines(const std::string& s) {
+#ifdef _WIN32
+  static const std::regex nl_regex("\r\n");
+  return std::regex_replace(s, nl_regex, "\n");
+#else
+  return s;
+#endif
+}
+
 /* Values that behave roughly like in Python. */
 class Value : public std::enable_shared_from_this<Value> {
  public:
@@ -210,6 +220,39 @@ class Value : public std::enable_shared_from_this<Value> {
       throw std::runtime_error("Value is not an array: " + dump());
     array_->push_back(v);
   }
+  Value pop(const Value& index) {
+    if (is_array()) {
+      if (array_->empty())
+        throw std::runtime_error("pop from empty list");
+      if (index.is_null()) {
+        auto ret = array_->back();
+        array_->pop_back();
+        return ret;
+      } else if (!index.is_number_integer()) {
+        throw std::runtime_error("pop index must be an integer: " +
+                                 index.dump());
+      } else {
+        auto i = index.get<int>();
+        if (i < 0 || i >= static_cast<int>(array_->size()))
+          throw std::runtime_error("pop index out of range: " + index.dump());
+        auto it = array_->begin() + (i < 0 ? array_->size() + i : i);
+        auto ret = *it;
+        array_->erase(it);
+        return ret;
+      }
+    } else if (is_object()) {
+      if (!index.is_hashable())
+        throw std::runtime_error("Unashable type: " + index.dump());
+      auto it = object_->find(index.primitive_);
+      if (it == object_->end())
+        throw std::runtime_error("Key not found: " + index.dump());
+      auto ret = it->second;
+      object_->erase(it);
+      return ret;
+    } else {
+      throw std::runtime_error("Value is not an array or object: " + dump());
+    }
+  }
   Value get(const Value& key) {
     if (array_) {
       if (!key.is_number_integer()) {
@@ -409,12 +452,12 @@ class Value : public std::enable_shared_from_this<Value> {
     }
   }
   void erase(size_t index) {
-    if (array_)
+    if (!array_)
       throw std::runtime_error("Value is not an array: " + dump());
     array_->erase(array_->begin() + index);
   }
   void erase(const std::string& key) {
-    if (object_)
+    if (!object_)
       throw std::runtime_error("Value is not an object: " + dump());
     object_->erase(key);
   }
@@ -635,7 +678,7 @@ static std::string error_location_suffix(const std::string& source,
   if (line > 1)
     out << get_line(line - 1) << "\n";
   out << get_line(line) << "\n";
-  out << std::string(col - 1, ' ') << "^" << "\n";
+  out << std::string(col - 1, ' ') << "^\n";
   if (line < max_line)
     out << get_line(line + 1) << "\n";
 
@@ -682,7 +725,9 @@ class Context : public std::enable_shared_from_this<Context> {
       return parent_->contains(key);
     return false;
   }
-  virtual void set(const Value& key, Value& value) { values_.set(key, value); }
+  virtual void set(const Value& key, const Value& value) {
+    values_.set(key, value);
+  }
 };
 
 struct Location {
@@ -762,13 +807,17 @@ class TemplateToken {
     EndIf,
     For,
     EndFor,
+    Generation,
+    EndGeneration,
     Set,
     EndSet,
     Comment,
     Macro,
     EndMacro,
     Filter,
-    EndFilter
+    EndFilter,
+    Break,
+    Continue
   };
 
   static std::string typeToString(Type t) {
@@ -803,6 +852,14 @@ class TemplateToken {
         return "filter";
       case Type::EndFilter:
         return "endfilter";
+      case Type::Generation:
+        return "generation";
+      case Type::EndGeneration:
+        return "endgeneration";
+      case Type::Break:
+        return "break";
+      case Type::Continue:
+        return "continue";
     }
     return "Unknown";
   }
@@ -913,6 +970,18 @@ struct EndForTemplateToken : public TemplateToken {
       : TemplateToken(Type::EndFor, location, pre, post) {}
 };
 
+struct GenerationTemplateToken : public TemplateToken {
+  GenerationTemplateToken(const Location& location, SpaceHandling pre,
+                          SpaceHandling post)
+      : TemplateToken(Type::Generation, location, pre, post) {}
+};
+
+struct EndGenerationTemplateToken : public TemplateToken {
+  EndGenerationTemplateToken(const Location& location, SpaceHandling pre,
+                             SpaceHandling post)
+      : TemplateToken(Type::EndGeneration, location, pre, post) {}
+};
+
 struct SetTemplateToken : public TemplateToken {
   std::string ns;
   std::vector<std::string> var_names;
@@ -940,6 +1009,28 @@ struct CommentTemplateToken : public TemplateToken {
       : TemplateToken(Type::Comment, location, pre, post), text(t) {}
 };
 
+enum class LoopControlType { Break, Continue };
+
+class LoopControlException : public std::runtime_error {
+ public:
+  LoopControlType control_type;
+  LoopControlException(const std::string& message, LoopControlType control_type)
+      : std::runtime_error(message), control_type(control_type) {}
+  LoopControlException(LoopControlType control_type)
+      : std::runtime_error(
+            (control_type == LoopControlType::Continue ? "continue" : "break") +
+            std::string(" outside of a loop")),
+        control_type(control_type) {}
+};
+
+struct LoopControlTemplateToken : public TemplateToken {
+  LoopControlType control_type;
+  LoopControlTemplateToken(const Location& location, SpaceHandling pre,
+                           SpaceHandling post, LoopControlType control_type)
+      : TemplateToken(Type::Break, location, pre, post),
+        control_type(control_type) {}
+};
+
 class TemplateNode {
   Location location_;
 
@@ -953,6 +1044,13 @@ class TemplateNode {
               const std::shared_ptr<Context>& context) const {
     try {
       do_render(out, context);
+    } catch (const LoopControlException& e) {
+      // TODO: make stack creation lazy. Only needed if it was thrown outside of a loop.
+      std::ostringstream err;
+      err << e.what();
+      if (location_.source)
+        err << error_location_suffix(*location_.source, location_.pos);
+      throw LoopControlException(err.str(), e.control_type);
     } catch (const std::exception& e) {
       std::ostringstream err;
       err << e.what();
@@ -1044,6 +1142,18 @@ class IfNode : public TemplateNode {
   }
 };
 
+class LoopControlNode : public TemplateNode {
+  LoopControlType control_type_;
+
+ public:
+  LoopControlNode(const Location& location, LoopControlType control_type)
+      : TemplateNode(location), control_type_(control_type) {}
+  void do_render(std::ostringstream&,
+                 const std::shared_ptr<Context>&) const override {
+    throw LoopControlException(control_type_);
+  }
+};
+
 class ForNode : public TemplateNode {
   std::vector<std::string> var_names;
   std::shared_ptr<Expression> iterable;
@@ -1126,7 +1236,14 @@ class ForNode : public TemplateNode {
           loop.set("last", i == (n - 1));
           loop.set("previtem", i > 0 ? filtered_items.at(i - 1) : Value());
           loop.set("nextitem", i < n - 1 ? filtered_items.at(i + 1) : Value());
-          body->render(out, loop_context);
+          try {
+            body->render(out, loop_context);
+          } catch (const LoopControlException& e) {
+            if (e.control_type == LoopControlType::Break)
+              break;
+            if (e.control_type == LoopControlType::Continue)
+              continue;
+          }
         }
       }
     };
@@ -1543,8 +1660,8 @@ class BinaryOpExpr : public Expression {
         return right->evaluate(context).to_bool();
       } else if (op == Op::Or) {
         if (l.to_bool())
-          return Value(true);
-        return right->evaluate(context).to_bool();
+          return l;
+        return right->evaluate(context);
       }
 
       auto r = right->evaluate(context);
@@ -1638,8 +1755,19 @@ struct ArgumentsExpression {
 };
 
 static std::string strip(const std::string& s) {
-  static std::regex trailing_spaces_regex("^\\s+|\\s+$");
-  return std::regex_replace(s, trailing_spaces_regex, "");
+  auto start = s.find_first_not_of(" \t\n\r");
+  if (start == std::string::npos)
+    return "";
+  auto end = s.find_last_not_of(" \t\n\r");
+  return s.substr(start, end - start + 1);
+}
+
+static std::string capitalize(const std::string& s) {
+  if (s.empty())
+    return s;
+  auto result = s;
+  result[0] = std::toupper(result[0]);
+  return result;
 }
 
 static std::string html_escape(const std::string& s) {
@@ -1657,7 +1785,7 @@ static std::string html_escape(const std::string& s) {
         result += "&gt;";
         break;
       case '"':
-        result += "&quot;";
+        result += "&#34;";
         break;
       case '\'':
         result += "&apos;";
@@ -1698,6 +1826,9 @@ class MethodCallExpr : public Expression {
         vargs.expectArgs("append method", {1, 1}, {0, 0});
         obj.push_back(vargs.args[0]);
         return Value();
+      } else if (method->get_name() == "pop") {
+        vargs.expectArgs("pop method", {0, 1}, {0, 0});
+        return obj.pop(vargs.args.empty() ? Value() : vargs.args[0]);
       } else if (method->get_name() == "insert") {
         vargs.expectArgs("insert method", {2, 2}, {0, 0});
         auto index = vargs.args[0].get<int64_t>();
@@ -1714,6 +1845,9 @@ class MethodCallExpr : public Expression {
           result.push_back(Value::array({key, obj.at(key)}));
         }
         return result;
+      } else if (method->get_name() == "pop") {
+        vargs.expectArgs("pop method", {1, 1}, {0, 0});
+        return obj.pop(vargs.args[0]);
       } else if (method->get_name() == "get") {
         vargs.expectArgs("get method", {1, 2}, {0, 0});
         auto key = vargs.args[0];
@@ -1735,6 +1869,9 @@ class MethodCallExpr : public Expression {
       if (method->get_name() == "strip") {
         vargs.expectArgs("strip method", {0, 0}, {0, 0});
         return Value(strip(str));
+      } else if (method->get_name() == "capitalize") {
+        vargs.expectArgs("capitalize method", {0, 0}, {0, 0});
+        return Value(capitalize(str));
       } else if (method->get_name() == "endswith") {
         vargs.expectArgs("endswith method", {1, 1}, {0, 0});
         auto suffix = vargs.args[0].get<std::string>();
@@ -2129,8 +2266,7 @@ class Parser {
       throw std::runtime_error(
           "Expected left side of 'logical compare' expression");
 
-    static std::regex compare_tok(
-        R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)");
+    static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)");
     static std::regex not_tok(R"(not\b)");
     std::string op_str;
     while (!(op_str = consumeToken(compare_tok)).empty()) {
@@ -2593,8 +2729,7 @@ class Parser {
   using TemplateTokenIterator = TemplateTokenVector::const_iterator;
 
   std::vector<std::string> parseVarNames() {
-    static std::regex varnames_regex(
-        R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)");
+    static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)");
 
     std::vector<std::string> group;
     if ((group = consumeTokenGroups(varnames_regex)).empty())
@@ -2620,18 +2755,19 @@ class Parser {
   }
 
   TemplateTokenVector tokenize() {
-    static std::regex comment_tok(R"(\{#([-~]?)(.*?)([-~]?)#\})");
+    static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})");
     static std::regex expr_open_regex(R"(\{\{([-~])?)");
-    static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)");
+    static std::regex block_open_regex(R"(^\{%([-~])?\s*)");
     static std::regex block_keyword_tok(
-        R"((if|else|elif|endif|for|endfor|set|endset|block|endblock|macro|endmacro|filter|endfilter)\b)");
-    static std::regex text_regex(R"([\s\S\n\r]*?($|(?=\{\{|\{%|\{#)))");
-    static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})");
-    static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})");
+        R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)");
+    static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)");
+    static std::regex expr_close_regex(R"(\s*([-~])?\}\})");
+    static std::regex block_close_regex(R"(\s*([-~])?%\})");
 
     TemplateTokenVector tokens;
     std::vector<std::string> group;
     std::string text;
+    std::smatch match;
 
     try {
       while (it != end) {
@@ -2723,9 +2859,16 @@ class Parser {
             auto post_space = parseBlockClose();
             tokens.push_back(std::make_unique<EndForTemplateToken>(
                 location, pre_space, post_space));
+          } else if (keyword == "generation") {
+            auto post_space = parseBlockClose();
+            tokens.push_back(std::make_unique<GenerationTemplateToken>(
+                location, pre_space, post_space));
+          } else if (keyword == "endgeneration") {
+            auto post_space = parseBlockClose();
+            tokens.push_back(std::make_unique<EndGenerationTemplateToken>(
+                location, pre_space, post_space));
           } else if (keyword == "set") {
-            static std::regex namespaced_var_regex(
-                R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))");
+            static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))");
 
             std::string ns;
             std::vector<std::string> var_names;
@@ -2783,16 +2926,31 @@ class Parser {
             auto post_space = parseBlockClose();
             tokens.push_back(std::make_unique<EndFilterTemplateToken>(
                 location, pre_space, post_space));
+          } else if (keyword == "break" || keyword == "continue") {
+            auto post_space = parseBlockClose();
+            tokens.push_back(std::make_unique<LoopControlTemplateToken>(
+                location, pre_space, post_space,
+                keyword == "break" ? LoopControlType::Break
+                                   : LoopControlType::Continue));
           } else {
             throw std::runtime_error("Unexpected block: " + keyword);
           }
-        } else if (!(text = consumeToken(text_regex, SpaceHandling::Keep))
-                        .empty()) {
+        } else if (std::regex_search(it, end, match, non_text_open_regex)) {
+          if (!match.position()) {
+            if (match[0] != "{#")
+              throw std::runtime_error("Internal error: Expected a comment");
+            throw std::runtime_error("Missing end of comment tag");
+          }
+          auto text_end = it + match.position();
+          text = std::string(it, text_end);
+          it = text_end;
           tokens.push_back(std::make_unique<TextTemplateToken>(
               location, SpaceHandling::Keep, SpaceHandling::Keep, text));
         } else {
-          if (it != end)
-            throw std::runtime_error("Unexpected character");
+          text = std::string(it, end);
+          it = end;
+          tokens.push_back(std::make_unique<TextTemplateToken>(
+              location, SpaceHandling::Keep, SpaceHandling::Keep, text));
         }
       }
       return tokens;
@@ -2845,6 +3003,14 @@ class Parser {
             token->location, std::move(for_token->var_names),
             std::move(for_token->iterable), std::move(for_token->condition),
             std::move(body), for_token->recursive, std::move(else_body)));
+      } else if (dynamic_cast<GenerationTemplateToken*>(token.get())) {
+        auto body = parseTemplate(begin, it, end);
+        if (it == end ||
+            (*(it++))->type != TemplateToken::Type::EndGeneration) {
+          throw unterminated(**start);
+        }
+        // Treat as a no-op, as our scope is templates for inference, not training (`{% generation %}` wraps generated tokens for masking).
+        children.emplace_back(std::move(body));
       } else if (auto text_token =
                      dynamic_cast<TextTemplateToken*>(token.get())) {
         SpaceHandling pre_space =
@@ -2853,25 +3019,34 @@ class Parser {
             it != end ? (*it)->pre_space : SpaceHandling::Keep;
 
         auto text = text_token->text;
+        if (post_space == SpaceHandling::Strip) {
+          static std::regex trailing_space_regex(R"(\s+$)");
+          text = std::regex_replace(text, trailing_space_regex, "");
+        } else if (options.lstrip_blocks && it != end) {
+          auto i = text.size();
+          while (i > 0 && (text[i - 1] == ' ' || text[i - 1] == '\t'))
+            i--;
+          if ((i == 0 && (it - 1) == begin) || (i > 0 && text[i - 1] == '\n')) {
+            text.resize(i);
+          }
+        }
         if (pre_space == SpaceHandling::Strip) {
-          static std::regex leading_space_regex(R"(^(\s|\r|\n)+)");
+          static std::regex leading_space_regex(R"(^\s+)");
           text = std::regex_replace(text, leading_space_regex, "");
         } else if (options.trim_blocks && (it - 1) != begin &&
                    !dynamic_cast<ExpressionTemplateToken*>((*(it - 2)).get())) {
-          static std::regex leading_line(R"(^[ \t]*\r?\n)");
-          text = std::regex_replace(text, leading_line, "");
-        }
-        if (post_space == SpaceHandling::Strip) {
-          static std::regex trailing_space_regex(R"((\s|\r|\n)+$)");
-          text = std::regex_replace(text, trailing_space_regex, "");
-        } else if (options.lstrip_blocks && it != end) {
-          static std::regex trailing_last_line_space_regex(R"((\r?\n)[ \t]*$)");
-          text = std::regex_replace(text, trailing_last_line_space_regex, "$1");
+          if (text.length() > 0 && text[0] == '\n') {
+            text.erase(0, 1);
+          }
         }
-
         if (it == end && !options.keep_trailing_newline) {
-          static std::regex r(R"(\r?\n$)");
-          text = std::regex_replace(text, r, "");  // Strip one trailing newline
+          auto i = text.size();
+          if (i > 0 && text[i - 1] == '\n') {
+            i--;
+            if (i > 0 && text[i - 1] == '\r')
+              i--;
+            text.resize(i);
+          }
         }
         children.emplace_back(
             std::make_shared<TextNode>(token->location, text));
@@ -2920,12 +3095,17 @@ class Parser {
             token->location, std::move(filter_token->filter), std::move(body)));
       } else if (dynamic_cast<CommentTemplateToken*>(token.get())) {
         // Ignore comments
+      } else if (auto ctrl_token =
+                     dynamic_cast<LoopControlTemplateToken*>(token.get())) {
+        children.emplace_back(std::make_shared<LoopControlNode>(
+            token->location, ctrl_token->control_type));
       } else if (dynamic_cast<EndForTemplateToken*>(token.get()) ||
                  dynamic_cast<EndSetTemplateToken*>(token.get()) ||
                  dynamic_cast<EndMacroTemplateToken*>(token.get()) ||
                  dynamic_cast<EndFilterTemplateToken*>(token.get()) ||
                  dynamic_cast<EndIfTemplateToken*>(token.get()) ||
                  dynamic_cast<ElseTemplateToken*>(token.get()) ||
+                 dynamic_cast<EndGenerationTemplateToken*>(token.get()) ||
                  dynamic_cast<ElifTemplateToken*>(token.get())) {
         it--;   // unconsume the token
         break;  // exit the loop
@@ -2950,7 +3130,9 @@ class Parser {
  public:
   static std::shared_ptr<TemplateNode> parse(const std::string& template_str,
                                              const Options& options) {
-    Parser parser(std::make_shared<std::string>(template_str), options);
+    Parser parser(
+        std::make_shared<std::string>(normalize_newlines(template_str)),
+        options);
     auto tokens = parser.tokenize();
     TemplateTokenIterator begin = tokens.begin();
     auto it = begin;
@@ -3129,6 +3311,9 @@ inline std::shared_ptr<Context> Context::builtins() {
           "join", {"items", "d"},
           [](const std::shared_ptr<Context>&, Value& args) {
             auto do_join = [](Value& items, const std::string& sep) {
+              if (!items.is_array())
+                throw std::runtime_error("object is not iterable: " +
+                                         items.dump());
               std::ostringstream oss;
               auto first = true;
               for (size_t i = 0, n = items.size(); i < n; ++i) {
@@ -3161,7 +3346,7 @@ inline std::shared_ptr<Context> Context::builtins() {
                                                ArgumentsValue& args) {
                 auto ns = Value::object();
                 args.expectArgs("namespace", {0, 0},
-                                {0, std::numeric_limits<size_t>::max()});
+                                {0, (std::numeric_limits<size_t>::max)()});
                 for (auto& [name, value] : args.kwargs) {
                   ns.set(name, value);
                 }
@@ -3183,7 +3368,7 @@ inline std::shared_ptr<Context> Context::builtins() {
   globals.set("safe", simple_function("safe", {"value"},
                                       [](const std::shared_ptr<Context>&,
                                          Value& args) -> Value {
-                                        return args.at("value");
+                                        return args.at("value").to_str();
                                       }));
   globals.set("string", simple_function("string", {"value"},
                                         [](const std::shared_ptr<Context>&,
@@ -3234,35 +3419,42 @@ inline std::shared_ptr<Context> Context::builtins() {
           return filter.call(context, actual_args);
         });
   };
-  // https://jinja.palletsprojects.com/en/3.0.x/templates/#jinja-filters.reject
-  globals.set(
-      "reject", Value::callable([=](const std::shared_ptr<Context>& context,
-                                    ArgumentsValue& args) {
-        args.expectArgs("reject", {2, std::numeric_limits<size_t>::max()},
-                        {0, 0});
-        auto& items = args.args[0];
-        auto filter_fn = context->get(args.args[1]);
-        if (filter_fn.is_null())
-          throw std::runtime_error("Undefined filter: " + args.args[1].dump());
-
-        auto filter_args = Value::array();
-        for (size_t i = 2, n = args.args.size(); i < n; i++) {
-          filter_args.push_back(args.args[i]);
-        }
-        auto filter = make_filter(filter_fn, filter_args);
+  auto select_or_reject = [make_filter](bool is_select) {
+    return Value::callable([=](const std::shared_ptr<Context>& context,
+                               ArgumentsValue& args) {
+      args.expectArgs(is_select ? "select" : "reject",
+                      {2, (std::numeric_limits<size_t>::max)()}, {0, 0});
+      auto& items = args.args[0];
+      if (items.is_null())
+        return Value::array();
+      if (!items.is_array())
+        throw std::runtime_error("object is not iterable: " + items.dump());
+
+      auto filter_fn = context->get(args.args[1]);
+      if (filter_fn.is_null())
+        throw std::runtime_error("Undefined filter: " + args.args[1].dump());
+
+      auto filter_args = Value::array();
+      for (size_t i = 2, n = args.args.size(); i < n; i++) {
+        filter_args.push_back(args.args[i]);
+      }
+      auto filter = make_filter(filter_fn, filter_args);
 
-        auto res = Value::array();
-        for (size_t i = 0, n = items.size(); i < n; i++) {
-          auto& item = items.at(i);
-          ArgumentsValue filter_args;
-          filter_args.args.emplace_back(item);
-          auto pred_res = filter.call(context, filter_args);
-          if (!pred_res.to_bool()) {
-            res.push_back(item);
-          }
+      auto res = Value::array();
+      for (size_t i = 0, n = items.size(); i < n; i++) {
+        auto& item = items.at(i);
+        ArgumentsValue filter_args;
+        filter_args.args.emplace_back(item);
+        auto pred_res = filter.call(context, filter_args);
+        if (pred_res.to_bool() == (is_select ? true : false)) {
+          res.push_back(item);
         }
-        return res;
-      }));
+      }
+      return res;
+    });
+  };
+  globals.set("select", select_or_reject(/* is_select= */ true));
+  globals.set("reject", select_or_reject(/* is_select= */ false));
   globals.set(
       "map", Value::callable([=](const std::shared_ptr<Context>& context,
                                  ArgumentsValue& args) {
@@ -3322,45 +3514,51 @@ inline std::shared_ptr<Context> Context::builtins() {
                                   out += "\n";
                                 return out;
                               }));
-  globals.set(
-      "selectattr", Value::callable([=](const std::shared_ptr<Context>& context,
-                                        ArgumentsValue& args) {
-        args.expectArgs("selectattr", {2, std::numeric_limits<size_t>::max()},
-                        {0, 0});
-        auto& items = args.args[0];
-        if (items.is_null())
-          return Value::array();
-        auto attr_name = args.args[1].get<std::string>();
-
-        bool has_test = false;
-        Value test_fn;
-        ArgumentsValue test_args{{Value()}, {}};
-        if (args.args.size() >= 3) {
-          has_test = true;
-          test_fn = context->get(args.args[2]);
-          if (test_fn.is_null())
-            throw std::runtime_error("Undefined test: " + args.args[2].dump());
-          for (size_t i = 3, n = args.args.size(); i < n; i++) {
-            test_args.args.emplace_back(args.args[i]);
-          }
-          test_args.kwargs = args.kwargs;
+  auto select_or_reject_attr = [](bool is_select) {
+    return Value::callable([=](const std::shared_ptr<Context>& context,
+                               ArgumentsValue& args) {
+      args.expectArgs(is_select ? "selectattr" : "rejectattr",
+                      {2, (std::numeric_limits<size_t>::max)()}, {0, 0});
+      auto& items = args.args[0];
+      if (items.is_null())
+        return Value::array();
+      if (!items.is_array())
+        throw std::runtime_error("object is not iterable: " + items.dump());
+      auto attr_name = args.args[1].get<std::string>();
+
+      bool has_test = false;
+      Value test_fn;
+      ArgumentsValue test_args{{Value()}, {}};
+      if (args.args.size() >= 3) {
+        has_test = true;
+        test_fn = context->get(args.args[2]);
+        if (test_fn.is_null())
+          throw std::runtime_error("Undefined test: " + args.args[2].dump());
+        for (size_t i = 3, n = args.args.size(); i < n; i++) {
+          test_args.args.emplace_back(args.args[i]);
         }
+        test_args.kwargs = args.kwargs;
+      }
 
-        auto res = Value::array();
-        for (size_t i = 0, n = items.size(); i < n; i++) {
-          auto& item = items.at(i);
-          auto attr = item.get(attr_name);
-          if (has_test) {
-            test_args.args[0] = attr;
-            if (test_fn.call(context, test_args).to_bool()) {
-              res.push_back(item);
-            }
-          } else {
-            res.push_back(attr);
+      auto res = Value::array();
+      for (size_t i = 0, n = items.size(); i < n; i++) {
+        auto& item = items.at(i);
+        auto attr = item.get(attr_name);
+        if (has_test) {
+          test_args.args[0] = attr;
+          if (test_fn.call(context, test_args).to_bool() ==
+              (is_select ? true : false)) {
+            res.push_back(item);
           }
+        } else {
+          res.push_back(attr);
         }
-        return res;
-      }));
+      }
+      return res;
+    });
+  };
+  globals.set("selectattr", select_or_reject_attr(/* is_select= */ true));
+  globals.set("rejectattr", select_or_reject_attr(/* is_select= */ false));
   globals.set("range", Value::callable([=](const std::shared_ptr<Context>&,
                                            ArgumentsValue& args) {
                 std::vector<int64_t> startEndStep(3);
@@ -3425,4 +3623,4 @@ inline std::shared_ptr<Context> Context::make(
       values.is_null() ? Value::object() : std::move(values), parent);
 }
 
-}  // namespace minja
+}  // namespace minja
\ No newline at end of file

From ca68981c3f65fcc5913659d1bfd5af0d96455707 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Mon, 24 Feb 2025 13:13:59 +0530
Subject: [PATCH 17/23] Format header includes

---
 engine/common/download_task_queue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/common/download_task_queue.h b/engine/common/download_task_queue.h
index 345f81512..2690284ea 100644
--- a/engine/common/download_task_queue.h
+++ b/engine/common/download_task_queue.h
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <condition_variable>
 #include <deque>
 #include <mutex>
@@ -6,7 +7,6 @@
 #include <string>
 #include <unordered_map>
 #include "common/download_task.h"
-#include <algorithm>
 
 class DownloadTaskQueue {
  private:

From a312585446eaf21206f3e30ccf8769cfa395edd9 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Mon, 24 Feb 2025 13:16:40 +0530
Subject: [PATCH 18/23] Format includes on the rest of the files

---
 engine/repositories/assistant_fs_repository.cc | 2 +-
 engine/repositories/file_fs_repository.cc      | 2 +-
 engine/repositories/thread_fs_repository.cc    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/engine/repositories/assistant_fs_repository.cc b/engine/repositories/assistant_fs_repository.cc
index 30bed1663..290471b50 100644
--- a/engine/repositories/assistant_fs_repository.cc
+++ b/engine/repositories/assistant_fs_repository.cc
@@ -1,10 +1,10 @@
 #include "assistant_fs_repository.h"
 #include <json/reader.h>
+#include <algorithm>
 #include <filesystem>
 #include <fstream>
 #include <mutex>
 #include "utils/result.hpp"
-#include <algorithm>
 
 cpp::result<std::vector<OpenAi::Assistant>, std::string>
 AssistantFsRepository::ListAssistants(uint8_t limit, const std::string& order,
diff --git a/engine/repositories/file_fs_repository.cc b/engine/repositories/file_fs_repository.cc
index 3ddb90977..4ec6c1ab2 100644
--- a/engine/repositories/file_fs_repository.cc
+++ b/engine/repositories/file_fs_repository.cc
@@ -1,11 +1,11 @@
 #include "file_fs_repository.h"
 #include <json/reader.h>
+#include <algorithm>
 #include <filesystem>
 #include <fstream>
 #include "database/file.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
-#include <algorithm>
 
 std::filesystem::path FileFsRepository::GetFilePath() const {
   return data_folder_path_ / kFileContainerFolderName;
diff --git a/engine/repositories/thread_fs_repository.cc b/engine/repositories/thread_fs_repository.cc
index 36650be76..06fdd1028 100644
--- a/engine/repositories/thread_fs_repository.cc
+++ b/engine/repositories/thread_fs_repository.cc
@@ -1,9 +1,9 @@
 #include "thread_fs_repository.h"
+#include <algorithm>
 #include <fstream>
 #include <mutex>
 #include "common/assistant.h"
 #include "utils/result.hpp"
-#include <algorithm>
 
 cpp::result<std::vector<OpenAi::Thread>, std::string>
 ThreadFsRepository::ListThreads(uint8_t limit, const std::string& order,

From 752c2c51d52ae0a746fcf3772e897fad1cea5d10 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 25 Feb 2025 08:55:00 +0700
Subject: [PATCH 19/23] fix: add filter by tag for repository list (#2021)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/controllers/models.cc            |  5 ++--
 engine/controllers/models.h             |  7 +++--
 engine/services/model_source_service.cc | 38 ++++++++++++++-----------
 engine/services/model_source_service.h  | 25 ++++++++++------
 4 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index afcedffd7..44350d3d1 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -859,10 +859,11 @@ void Models::GetModelSource(
 void Models::GetRepositoryList(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback,
-    std::optional<std::string> author) {
+    std::optional<std::string> author, std::optional<std::string> tag) {
   if (!author.has_value())
     author = "cortexso";
-  auto res = model_src_svc_->GetRepositoryList(author.value());
+  auto res =
+      model_src_svc_->GetRepositoryList(author.value(), tag.value_or(""));
   if (res.has_error()) {
     Json::Value ret;
     ret["message"] = res.error();
diff --git a/engine/controllers/models.h b/engine/controllers/models.h
index f21b50ea1..a605be761 100644
--- a/engine/controllers/models.h
+++ b/engine/controllers/models.h
@@ -44,8 +44,8 @@ class Models : public drogon::HttpController<Models, false> {
   ADD_METHOD_TO(Models::DeleteModelSource, "/v1/models/sources", Delete);
   ADD_METHOD_TO(Models::GetModelSources, "/v1/models/sources", Get);
   ADD_METHOD_TO(Models::GetModelSource, "/v1/models/sources/{src}", Get);
-  ADD_METHOD_TO(Models::GetRepositoryList, "/v1/models/hub?author={author}",
-                Get);
+  ADD_METHOD_TO(Models::GetRepositoryList,
+                "/v1/models/hub?author={author}&tag={tag}", Get);
   METHOD_LIST_END
 
   explicit Models(std::shared_ptr<DatabaseService> db_service,
@@ -115,7 +115,8 @@ class Models : public drogon::HttpController<Models, false> {
 
   void GetRepositoryList(const HttpRequestPtr& req,
                          std::function<void(const HttpResponsePtr&)>&& callback,
-                         std::optional<std::string> author);
+                         std::optional<std::string> author,
+                         std::optional<std::string> tag);
 
  private:
   std::shared_ptr<DatabaseService> db_service_;
diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
index 5298aa75c..59275e8db 100644
--- a/engine/services/model_source_service.cc
+++ b/engine/services/model_source_service.cc
@@ -14,17 +14,6 @@
 namespace hu = huggingface_utils;
 
 namespace {
-struct ModelInfo {
-  std::string id;
-  int likes;
-  int trending_score;
-  bool is_private;
-  int downloads;
-  std::vector<std::string> tags;
-  std::string created_at;
-  std::string model_id;
-};
-
 std::vector<ModelInfo> ParseJsonString(const std::string& json_str) {
   std::vector<ModelInfo> models;
 
@@ -201,20 +190,37 @@ cpp::result<ModelSource, std::string> ModelSourceService::GetModelSource(
 }
 
 cpp::result<std::vector<std::string>, std::string>
-ModelSourceService::GetRepositoryList(std::string_view author) {
+ModelSourceService::GetRepositoryList(std::string_view author,
+                                      std::string_view tag_filter) {
   std::string as(author);
+  auto get_repo_list = [this, &as, &tag_filter] {
+    std::vector<std::string> repo_list;
+    auto const& mis = cortexso_repos_.at(as);
+    for (auto const& mi : mis) {
+      if (!tag_filter.empty()) {
+        if (std::count(mi.tags.begin(), mi.tags.end(), tag_filter)) {
+          repo_list.push_back(mi.id);
+        }
+      } else {
+        repo_list.push_back(mi.id);
+      }
+    }
+    return repo_list;
+  };
   if (cortexso_repos_.find(as) != cortexso_repos_.end() &&
-      !cortexso_repos_.at(as).empty())
-    return cortexso_repos_.at(as);
+      !cortexso_repos_.at(as).empty()) {
+    return get_repo_list();
+  }
+
   const auto begin = std::chrono::high_resolution_clock::now();
   auto res =
       curl_utils::SimpleGet("https://huggingface.co/api/models?author=" + as);
   if (res.has_value()) {
     auto repos = ParseJsonString(res.value());
     for (auto& r : repos) {
-      cortexso_repos_[as].push_back(r.id);
+      cortexso_repos_[as].push_back(r);
     }
-    return cortexso_repos_.at(as);
+    return get_repo_list();
   } else {
     return cpp::fail(res.error());
   }
diff --git a/engine/services/model_source_service.h b/engine/services/model_source_service.h
index 40ca1f887..cffe93bb9 100644
--- a/engine/services/model_source_service.h
+++ b/engine/services/model_source_service.h
@@ -37,6 +37,17 @@ struct ModelSource {
   };
 };
 
+struct ModelInfo {
+  std::string id;
+  int likes;
+  int trending_score;
+  bool is_private;
+  int downloads;
+  std::vector<std::string> tags;
+  std::string created_at;
+  std::string model_id;
+};
+
 class ModelSourceService {
  public:
   explicit ModelSourceService(std::shared_ptr<DatabaseService> db_service);
@@ -54,7 +65,7 @@ class ModelSourceService {
   cpp::result<ModelSource, std::string> GetModelSource(const std::string& src);
 
   cpp::result<std::vector<std::string>, std::string> GetRepositoryList(
-      std::string_view author);
+      std::string_view author, std::string_view tag_filter);
 
  private:
   cpp::result<bool, std::string> AddHfOrg(const std::string& model_source,
@@ -75,12 +86,10 @@ class ModelSourceService {
       const std::string& model_source, const std::string& author,
       const std::string& model_name);
 
-  cpp::result<std::string, std::string>
-  AddCortexsoRepoBranch(const std::string& model_source,
-                        const std::string& author,
-                        const std::string& model_name,
-                        const std::string& branch, const std::string& metadata,
-                        const std::string& desc);
+  cpp::result<std::string, std::string> AddCortexsoRepoBranch(
+      const std::string& model_source, const std::string& author,
+      const std::string& model_name, const std::string& branch,
+      const std::string& metadata, const std::string& desc);
 
   void SyncModelSource();
 
@@ -89,5 +98,5 @@ class ModelSourceService {
   std::thread sync_db_thread_;
   std::atomic<bool> running_;
 
-  std::unordered_map<std::string, std::vector<std::string>> cortexso_repos_;
+  std::unordered_map<std::string, std::vector<ModelInfo>> cortexso_repos_;
 };
\ No newline at end of file

From 54432adbbc9821ac0cca320fc436f5401fc76471 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 25 Feb 2025 08:55:24 +0700
Subject: [PATCH 20/23] fix: block `command` field changes for python engine
 (#2007)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/controllers/models.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 44350d3d1..7439a5df5 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -385,6 +385,10 @@ void Models::UpdateModel(const HttpRequestPtr& req,
       message = "Successfully update model ID '" + model_id +
                 "': " + json_body.toStyledString();
     } else if (model_config.engine == kPythonEngine) {
+      // Block changes to `command`
+      if (json_body.isMember("command")) {
+        json_body.removeMember("command");
+      }
       config::PythonModelConfig python_model_config;
       python_model_config.ReadFromYaml(yaml_fp.string());
       python_model_config.FromJson(json_body);

From a84b935221a3cd48fae125bcef9d0d0870c932d3 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 25 Feb 2025 08:55:41 +0700
Subject: [PATCH 21/23] fix: filter out Intel GPUs (#2015)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/utils/hardware/gpu/vulkan/vulkan_gpu.h | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
index bf1157931..4969794d1 100644
--- a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
+++ b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
@@ -24,15 +24,20 @@
 #endif
 
 namespace cortex::hw {
+constexpr const uint32_t NVIDIA_VENDOR = 0x10DE;
+constexpr const uint32_t AMD_VENDOR = 0x1002;
+constexpr const uint32_t INTEL_VENDOR = 0x8086;
+constexpr const uint32_t ARM_VENDOR = 0x13B5;
+
 inline std::string GetVendorStr(uint32_t vendor_id) {
   switch (vendor_id) {
-    case 0x1002:
+    case AMD_VENDOR:
       return "AMD";
-    case 0x10DE:
+    case NVIDIA_VENDOR:
       return "NVIDIA";
-    case 0x8086:
+    case INTEL_VENDOR:
       return "INTEL";
-    case 0x13B5:
+    case ARM_VENDOR:
       return "ARM";
     default:
       return std::to_string(vendor_id);
@@ -441,16 +446,19 @@ class VulkanGpu {
 #endif
       int free_vram_MiB =
           total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
-      gpus.emplace_back(cortex::hw::GPU{
-          .id = std::to_string(id),
-          .device_id = device_properties.deviceID,
-          .name = device_properties.deviceName,
-          .version = std::to_string(device_properties.driverVersion),
-          .add_info = cortex::hw::AmdAddInfo{},
-          .free_vram = free_vram_MiB,
-          .total_vram = total_vram_MiB,
-          .uuid = uuid_to_string(device_id_properties.deviceUUID),
-          .vendor = GetVendorStr(device_properties.vendorID)});
+      if (device_properties.vendorID == NVIDIA_VENDOR ||
+          device_properties.vendorID == AMD_VENDOR) {
+        gpus.emplace_back(cortex::hw::GPU{
+            .id = std::to_string(id),
+            .device_id = device_properties.deviceID,
+            .name = device_properties.deviceName,
+            .version = std::to_string(device_properties.driverVersion),
+            .add_info = cortex::hw::AmdAddInfo{},
+            .free_vram = free_vram_MiB,
+            .total_vram = total_vram_MiB,
+            .uuid = uuid_to_string(device_id_properties.deviceUUID),
+            .vendor = GetVendorStr(device_properties.vendorID)});
+      }
       id++;
     }
 

From eddc1d512dc14a7d5784ffa42fd3ec60c2e889e8 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 25 Feb 2025 09:28:33 +0700
Subject: [PATCH 22/23] chore: unit test (#2025)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/e2e-test/test_api_docker.py               | 2 +-
 engine/test/components/test_huggingface_utils.cc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py
index 7276f2078..4757c1550 100644
--- a/engine/e2e-test/test_api_docker.py
+++ b/engine/e2e-test/test_api_docker.py
@@ -2,7 +2,7 @@
 import requests
 from utils.test_runner import wait_for_websocket_download_success_event
 
-repo_branches = ["tinyllama:1b-gguf"]
+repo_branches = ["tinyllama:gguf"]
 
 
 class TestCortexsoModels:
diff --git a/engine/test/components/test_huggingface_utils.cc b/engine/test/components/test_huggingface_utils.cc
index 8377200e5..46701d0b0 100644
--- a/engine/test/components/test_huggingface_utils.cc
+++ b/engine/test/components/test_huggingface_utils.cc
@@ -10,8 +10,8 @@ TEST_F(HuggingFaceUtilTestSuite, TestGetModelRepositoryBranches) {
   EXPECT_GE(branches.value().size(), 3);
   EXPECT_EQ(branches.value()["main"].name, "main");
   EXPECT_EQ(branches.value()["main"].ref, "refs/heads/main");
-  EXPECT_EQ(branches.value()["1b-gguf"].name, "1b-gguf");
-  EXPECT_EQ(branches.value()["1b-gguf"].ref, "refs/heads/1b-gguf");
+  EXPECT_EQ(branches.value()["1b"].name, "1b");
+  EXPECT_EQ(branches.value()["1b"].ref, "refs/heads/1b");
   EXPECT_EQ(branches.value()["gguf"].name, "gguf");
   EXPECT_EQ(branches.value()["gguf"].ref, "refs/heads/gguf");
 }

From 75ad69c100324a22dce25512fe10f493b788c924 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 25 Feb 2025 11:05:17 +0700
Subject: [PATCH 23/23] chore: update tests (#2027)

Co-authored-by: sangjanai <sang@jan.ai>
---
 docs/static/openapi/cortex.json                    |  2 +-
 engine/e2e-test/api/model/test_api_model.py        | 14 +++++++-------
 engine/e2e-test/api/model/test_api_model_import.py |  6 +++---
 engine/e2e-test/cli/model/test_cli_model.py        | 10 +++++-----
 engine/e2e-test/local_test.py                      | 10 +++++-----
 engine/e2e-test/test_api_docker.py                 |  2 +-
 engine/e2e-test/utils/test_runner.py               |  2 +-
 engine/test/components/test_event.cc               |  2 +-
 engine/test/components/test_huggingface_utils.cc   |  7 +++----
 9 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 2deb15e5e..8f378a83f 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -5356,7 +5356,7 @@
             "type": "string",
             "description": "The identifier or URL of the model to use. It can be a model ID on Cortexso (https://huggingface.co/cortexso) or a HuggingFace URL pointing to the model file. For example: 'gpt2' or 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf'",
             "examples": [
-              "tinyllama:gguf",
+              "tinyllama:1b",
               "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
             ]
           },
diff --git a/engine/e2e-test/api/model/test_api_model.py b/engine/e2e-test/api/model/test_api_model.py
index fac94a9f1..bacf7e1b0 100644
--- a/engine/e2e-test/api/model/test_api_model.py
+++ b/engine/e2e-test/api/model/test_api_model.py
@@ -95,14 +95,14 @@ async def test_models_start_stop_should_be_successful(self):
         time.sleep(30)
 
         print("Pull model")
-        json_body = {"model": "tinyllama:gguf"}
+        json_body = {"model": "tinyllama:1b"}
         response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
-        assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf"
+        assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
         await wait_for_websocket_download_success_event(timeout=None)
         
         # get API
         print("Get model")
-        response = requests.get("http://localhost:3928/v1/models/tinyllama:gguf")
+        response = requests.get("http://localhost:3928/v1/models/tinyllama:1b")
         assert response.status_code == 200
         
         # list API
@@ -111,7 +111,7 @@ async def test_models_start_stop_should_be_successful(self):
         assert response.status_code == 200
 
         print("Start model")
-        json_body = {"model": "tinyllama:gguf"}
+        json_body = {"model": "tinyllama:1b"}
         response = requests.post(
             "http://localhost:3928/v1/models/start", json=json_body
         )
@@ -123,13 +123,13 @@ async def test_models_start_stop_should_be_successful(self):
                 
         # update API
         print("Update model")
-        body_json = {'model': 'tinyllama:gguf'}
-        response = requests.patch("http://localhost:3928/v1/models/tinyllama:gguf", json = body_json)        
+        body_json = {'model': 'tinyllama:1b'}
+        response = requests.patch("http://localhost:3928/v1/models/tinyllama:1b", json = body_json)        
         assert response.status_code == 200
 
         # delete API
         print("Delete model")
-        response = requests.delete("http://localhost:3928/v1/models/tinyllama:gguf")
+        response = requests.delete("http://localhost:3928/v1/models/tinyllama:1b")
         assert response.status_code == 200
         
     def test_models_sources_api(self):
diff --git a/engine/e2e-test/api/model/test_api_model_import.py b/engine/e2e-test/api/model/test_api_model_import.py
index 46a22e59e..34746dbe9 100644
--- a/engine/e2e-test/api/model/test_api_model_import.py
+++ b/engine/e2e-test/api/model/test_api_model_import.py
@@ -16,14 +16,14 @@ def setup_and_teardown(self):
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
     def test_model_import_should_be_success(self):
-        body_json = {'model': 'tinyllama:gguf',
+        body_json = {'model': 'tinyllama:1b',
                      'modelPath': '/path/to/local/gguf'}
         response = requests.post("http://localhost:3928/v1/models/import", json=body_json)              
         assert response.status_code == 200
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
     def test_model_import_with_name_should_be_success(self):
-        body_json = {'model': 'tinyllama:gguf',
+        body_json = {'model': 'tinyllama:1b',
                      'modelPath': '/path/to/local/gguf',
                      'name': 'test_model'}
         response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
@@ -45,7 +45,7 @@ def test_model_import_with_name_should_be_success(self):
         assert response.json()['files'][0] != '/path/to/local/gguf'
 
     def test_model_import_with_invalid_path_should_fail(self):
-        body_json = {'model': 'tinyllama:gguf',
+        body_json = {'model': 'tinyllama:1b',
                      'modelPath': '/invalid/path/to/gguf'}
         response = requests.post("http://localhost:3928/v1/models/import", json=body_json)
         assert response.status_code == 400
diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py
index 63261c214..8577b3a58 100644
--- a/engine/e2e-test/cli/model/test_cli_model.py
+++ b/engine/e2e-test/cli/model/test_cli_model.py
@@ -22,7 +22,7 @@ def setup_and_teardown(self):
 
         # Teardown
         # Clean up
-        run("Delete model", ["models", "delete", "tinyllama:gguf"])
+        run("Delete model", ["models", "delete", "tinyllama:1b"])
         stop_server()
         
     def test_model_pull_with_direct_url_should_be_success(self):
@@ -40,13 +40,13 @@ def test_model_pull_with_direct_url_should_be_success(self):
         
     @pytest.mark.asyncio
     async def test_models_delete_should_be_successful(self):
-        json_body = {"model": "tinyllama:gguf"}
+        json_body = {"model": "tinyllama:1b"}
         response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
-        assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf"
+        assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
         await wait_for_websocket_download_success_event(timeout=None)
 
         exit_code, output, error = run(
-            "Delete model", ["models", "delete", "tinyllama:gguf"]
+            "Delete model", ["models", "delete", "tinyllama:1b"]
         )
-        assert "Model tinyllama:gguf deleted successfully" in output
+        assert "Model tinyllama:1b deleted successfully" in output
         assert exit_code == 0, f"Model does not exist: {error}"
\ No newline at end of file
diff --git a/engine/e2e-test/local_test.py b/engine/e2e-test/local_test.py
index be0905e66..3169af81f 100644
--- a/engine/e2e-test/local_test.py
+++ b/engine/e2e-test/local_test.py
@@ -9,7 +9,7 @@
 import websockets
 
 # Define a list of request configurations
-model_id = "tinyllama:gguf"
+model_id = "tinyllama:1b"
 
 
 def make_request(config):
@@ -78,7 +78,7 @@ def get_setup_configs(host_port):
             "method": "POST",
             "url": "http://" + host_port + "/v1/models/pull",
             "headers": {"Content-Type": "application/json"},
-            "data": {"model": "tinyllama:gguf"},
+            "data": {"model": "tinyllama:1b"},
         },
         {
             "method": "POST",
@@ -89,14 +89,14 @@ def get_setup_configs(host_port):
             "method": "POST",
             "url": "http://" + host_port + "/v1/models/start",
             "headers": {"Content-Type": "application/json"},
-            "data": {"model": "tinyllama:gguf"},
+            "data": {"model": "tinyllama:1b"},
         },
         {
             "method": "POST",
             "url": "http://" + host_port + "/v1/chat/completions",
             "headers": {"Content-Type": "application/json"},
             "data": {
-                "model": "tinyllama:gguf",
+                "model": "tinyllama:1b",
                 "stream": True,
                 "messages": [{"content": "How are you today?", "role": "user"}],
                 "max_tokens": 256,
@@ -111,7 +111,7 @@ def get_teardown_configs(host_port):
             "method": "POST",
             "url": "http://" + host_port + "/v1/models/stop",
             "headers": {"Content-Type": "application/json"},
-            "data": {"model": "tinyllama:gguf"},
+            "data": {"model": "tinyllama:1b"},
         },
         {
             "method": "DELETE",
diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py
index 4757c1550..8089289c8 100644
--- a/engine/e2e-test/test_api_docker.py
+++ b/engine/e2e-test/test_api_docker.py
@@ -2,7 +2,7 @@
 import requests
 from utils.test_runner import wait_for_websocket_download_success_event
 
-repo_branches = ["tinyllama:gguf"]
+repo_branches = ["tinyllama:1b"]
 
 
 class TestCortexsoModels:
diff --git a/engine/e2e-test/utils/test_runner.py b/engine/e2e-test/utils/test_runner.py
index dfc515df7..f25fc2bc0 100644
--- a/engine/e2e-test/utils/test_runner.py
+++ b/engine/e2e-test/utils/test_runner.py
@@ -90,7 +90,7 @@ def start_server_if_needed():
         start_server()
         
 
-def pull_model_if_needed(model_id: str = "tinyllama:gguf"):
+def pull_model_if_needed(model_id: str = "tinyllama:1b"):
     """
     Pull the model if it is not already pulled.
     """
diff --git a/engine/test/components/test_event.cc b/engine/test/components/test_event.cc
index baa5fd16b..14b3d413a 100644
--- a/engine/test/components/test_event.cc
+++ b/engine/test/components/test_event.cc
@@ -9,7 +9,7 @@ TEST_F(EventTest, EventFromString) {
   // clang-format off
   std::string ev_str = R"({
     "task": {
-      "id": "tinyllama:gguf",
+      "id": "tinyllama:1b",
       "items": [
         {
           "bytes": 668788096,
diff --git a/engine/test/components/test_huggingface_utils.cc b/engine/test/components/test_huggingface_utils.cc
index 46701d0b0..a4e00a2bf 100644
--- a/engine/test/components/test_huggingface_utils.cc
+++ b/engine/test/components/test_huggingface_utils.cc
@@ -7,17 +7,16 @@ TEST_F(HuggingFaceUtilTestSuite, TestGetModelRepositoryBranches) {
   auto branches =
       huggingface_utils::GetModelRepositoryBranches("cortexso", "tinyllama");
 
-  EXPECT_GE(branches.value().size(), 3);
+  EXPECT_GE(branches.value().size(), 1);
   EXPECT_EQ(branches.value()["main"].name, "main");
   EXPECT_EQ(branches.value()["main"].ref, "refs/heads/main");
   EXPECT_EQ(branches.value()["1b"].name, "1b");
   EXPECT_EQ(branches.value()["1b"].ref, "refs/heads/1b");
-  EXPECT_EQ(branches.value()["gguf"].name, "gguf");
-  EXPECT_EQ(branches.value()["gguf"].ref, "refs/heads/gguf");
 }
 
 // TODO(sang) re-enable when main branch is fixed
-TEST_F(HuggingFaceUtilTestSuite, DISABLED_TestGetHuggingFaceModelRepoInfoSuccessfully) {
+TEST_F(HuggingFaceUtilTestSuite,
+       DISABLED_TestGetHuggingFaceModelRepoInfoSuccessfully) {
   auto model_info =
       huggingface_utils::GetHuggingFaceModelRepoInfo("cortexso", "tinyllama");