From ff1727f2fab79b6b13eea4698e6d5fd6f6cb9cdd Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Wed, 27 Aug 2025 15:09:31 +0530 Subject: [PATCH 1/2] fix: fetch_models_task fixed for files and docs added Signed-off-by: Anupam Kumar --- nc_py_api/__init__.py | 1 + nc_py_api/_exceptions.py | 4 + nc_py_api/ex_app/integration_fastapi.py | 156 +++++++++++++++--------- 3 files changed, 103 insertions(+), 58 deletions(-) diff --git a/nc_py_api/__init__.py b/nc_py_api/__init__.py index 8fad55db..e20f647f 100644 --- a/nc_py_api/__init__.py +++ b/nc_py_api/__init__.py @@ -2,6 +2,7 @@ from . import ex_app, options from ._exceptions import ( + ModelFetchError, NextcloudException, NextcloudExceptionNotFound, NextcloudMissingCapabilities, diff --git a/nc_py_api/_exceptions.py b/nc_py_api/_exceptions.py index c9365e46..a366c239 100644 --- a/nc_py_api/_exceptions.py +++ b/nc_py_api/_exceptions.py @@ -65,3 +65,7 @@ def check_error(response: Response, info: str = ""): response.raise_for_status() except HTTPError as e: raise NextcloudException(status_code, reason=response.reason, info=info) from e + + +class ModelFetchError(Exception): + """Exception raised when model fetching fails.""" diff --git a/nc_py_api/ex_app/integration_fastapi.py b/nc_py_api/ex_app/integration_fastapi.py index a1017fcc..19c81c77 100644 --- a/nc_py_api/ex_app/integration_fastapi.py +++ b/nc_py_api/ex_app/integration_fastapi.py @@ -7,6 +7,7 @@ import json import os import typing +from traceback import format_exc from urllib.parse import urlparse import niquests @@ -22,10 +23,10 @@ from starlette.requests import HTTPConnection, Request from starlette.types import ASGIApp, Receive, Scope, Send +from .._exceptions import ModelFetchError from .._misc import get_username_secret_from_headers from ..nextcloud import AsyncNextcloudApp, NextcloudApp from ..talk_bot import TalkBotMessage -from .defs import LogLvl from .misc import persistent_storage @@ -70,9 +71,24 @@ def set_handlers( .. note:: When this parameter is ``False``, the provision of ``models_to_fetch`` is not allowed. - :param models_to_fetch: Dictionary describing which models should be downloaded during `init`. + :param models_to_fetch: Dictionary describing which models should be downloaded during `init` of the form: + .. code-block:: python + { + "model_url_1": { + "save_path": "path_or_filename_to_save_the_model_to", + }, + "huggingface_model_name_1": { + "max_workers": 4, + "cache_dir": "path_to_cache_dir", + "revision": "revision_to_fetch", + ... + }, + ... + } + .. note:: ``huggingface_hub`` package should be present for automatic models fetching. + All model options are optional and can be left empty. :param map_app_static: Should be folders ``js``, ``css``, ``l10n``, ``img`` automatically mounted in FastAPI or not. @@ -121,74 +137,98 @@ def __map_app_static_folders(fast_api_app: FastAPI): def fetch_models_task(nc: NextcloudApp, models: dict[str, dict], progress_init_start_value: int) -> None: - """Use for cases when you want to define custom `/init` but still need to easy download models.""" + """Use for cases when you want to define custom `/init` but still need to easy download models. + + :param nc: NextcloudApp instance. + :param models_to_fetch: Dictionary describing which models should be downloaded of the form: + .. code-block:: python + { + "model_url_1": { + "save_path": "path_or_filename_to_save_the_model_to", + }, + "huggingface_model_name_1": { + "max_workers": 4, + "cache_dir": "path_to_cache_dir", + "revision": "revision_to_fetch", + ... + }, + ... + } + + .. note:: ``huggingface_hub`` package should be present for automatic models fetching. + All model options are optional and can be left empty. + + :param progress_init_start_value: Integer value defining from which percent the progress should start. + + :raises ModelFetchError: in case of a model download error. + :raises NextcloudException: in case of a network error reaching the Nextcloud server. + """ if models: current_progress = progress_init_start_value percent_for_each = min(int((100 - progress_init_start_value) / len(models)), 99) for model in models: - if model.startswith(("http://", "https://")): - models[model]["path"] = __fetch_model_as_file( - current_progress, percent_for_each, nc, model, models[model] - ) - else: - models[model]["path"] = __fetch_model_as_snapshot( - current_progress, percent_for_each, nc, model, models[model] - ) - current_progress += percent_for_each + try: + if model.startswith(("http://", "https://")): + models[model]["path"] = __fetch_model_as_file( + current_progress, percent_for_each, nc, model, models[model] + ) + else: + models[model]["path"] = __fetch_model_as_snapshot( + current_progress, percent_for_each, nc, model, models[model] + ) + current_progress += percent_for_each + except BaseException as e: # noqa pylint: disable=broad-exception-caught + nc.set_init_status(current_progress, f"Downloading of '{model}' failed: {e}: {format_exc()}") + raise ModelFetchError(f"Downloading of '{model}' failed.") from e nc.set_init_status(100) def __fetch_model_as_file( current_progress: int, progress_for_task: int, nc: NextcloudApp, model_path: str, download_options: dict -) -> str | None: +) -> str: result_path = download_options.pop("save_path", urlparse(model_path).path.split("/")[-1]) - try: - - with niquests.get("GET", model_path, stream=True) as response: - if not response.is_success: - nc.log(LogLvl.ERROR, f"Downloading of '{model_path}' returned {response.status_code} status.") - return None - downloaded_size = 0 - linked_etag = "" - for each_history in response.history: - linked_etag = each_history.headers.get("X-Linked-ETag", "") - if linked_etag: - break - if not linked_etag: - linked_etag = response.headers.get("X-Linked-ETag", response.headers.get("ETag", "")) - total_size = int(response.headers.get("Content-Length")) - try: - existing_size = os.path.getsize(result_path) - except OSError: - existing_size = 0 - if linked_etag and total_size == existing_size: - with builtins.open(result_path, "rb") as file: - sha256_hash = hashlib.sha256() - for byte_block in iter(lambda: file.read(4096), b""): - sha256_hash.update(byte_block) - if f'"{sha256_hash.hexdigest()}"' == linked_etag: - nc.set_init_status(min(current_progress + progress_for_task, 99)) - return None - - with builtins.open(result_path, "wb") as file: - last_progress = current_progress - for chunk in response.iter_raw(-1): - downloaded_size += file.write(chunk) - if total_size: - new_progress = min(current_progress + int(progress_for_task * downloaded_size / total_size), 99) - if new_progress != last_progress: - nc.set_init_status(new_progress) - last_progress = new_progress - - return result_path - except Exception as e: # noqa pylint: disable=broad-exception-caught - nc.log(LogLvl.ERROR, f"Downloading of '{model_path}' raised an exception: {e}") - - return None + with niquests.get(model_path, stream=True) as response: + if not response.ok: + raise ModelFetchError( + f"Downloading of '{model_path}' failed, returned ({response.status_code}) {response.text}" + ) + downloaded_size = 0 + linked_etag = "" + for each_history in response.history: + linked_etag = each_history.headers.get("X-Linked-ETag", "") + if linked_etag: + break + if not linked_etag: + linked_etag = response.headers.get("X-Linked-ETag", response.headers.get("ETag", "")) + total_size = int(response.headers.get("Content-Length")) + try: + existing_size = os.path.getsize(result_path) + except OSError: + existing_size = 0 + if linked_etag and total_size == existing_size: + with builtins.open(result_path, "rb") as file: + sha256_hash = hashlib.sha256() + for byte_block in iter(lambda: file.read(4096), b""): + sha256_hash.update(byte_block) + if f'"{sha256_hash.hexdigest()}"' == linked_etag: + nc.set_init_status(min(current_progress + progress_for_task, 99)) + return result_path + + with builtins.open(result_path, "wb") as file: + last_progress = current_progress + for chunk in response.iter_raw(-1): + downloaded_size += file.write(chunk) + if total_size: + new_progress = min(current_progress + int(progress_for_task * downloaded_size / total_size), 99) + if new_progress != last_progress: + nc.set_init_status(new_progress) + last_progress = new_progress + + return result_path def __fetch_model_as_snapshot( - current_progress: int, progress_for_task, nc: NextcloudApp, mode_name: str, download_options: dict + current_progress: int, progress_for_task, nc: NextcloudApp, model_name: str, download_options: dict ) -> str: from huggingface_hub import snapshot_download # noqa isort:skip pylint: disable=C0415 disable=E0401 from tqdm import tqdm # noqa isort:skip pylint: disable=C0415 disable=E0401 @@ -201,7 +241,7 @@ def display(self, msg=None, pos=None): workers = download_options.pop("max_workers", 2) cache = download_options.pop("cache_dir", persistent_storage()) return snapshot_download( - mode_name, tqdm_class=TqdmProgress, **download_options, max_workers=workers, cache_dir=cache + model_name, tqdm_class=TqdmProgress, **download_options, max_workers=workers, cache_dir=cache ) From 39766843108275607052b017d86c49d52844d47a Mon Sep 17 00:00:00 2001 From: bigcat88 Date: Wed, 27 Aug 2025 15:18:23 +0300 Subject: [PATCH 2/2] remove tests with invalid model paths Signed-off-by: bigcat88 --- tests/_install_init_handler_models.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/_install_init_handler_models.py b/tests/_install_init_handler_models.py index b96c87a6..7d233ca5 100644 --- a/tests/_install_init_handler_models.py +++ b/tests/_install_init_handler_models.py @@ -5,11 +5,9 @@ from nc_py_api import NextcloudApp, ex_app -INVALID_URL = "https://invalid_url" -MODEL_NAME1 = "MBZUAI/LaMini-T5-61M" +# TO-DO: add tests when ExApp fails to initialize due to invalid model fetch MODEL_NAME2 = "https://huggingface.co/MBZUAI/LaMini-T5-61M/resolve/main/pytorch_model.bin" MODEL_NAME2_http = "http://huggingface.co/MBZUAI/LaMini-T5-61M/resolve/main/pytorch_model.bin" -INVALID_PATH = "https://huggingface.co/invalid_path" SOME_FILE = "https://raw.githubusercontent.com/cloud-py-api/nc_py_api/main/README.md" @@ -19,11 +17,8 @@ async def lifespan(_app: FastAPI): APP, enabled_handler, models_to_fetch={ - INVALID_URL: {}, - MODEL_NAME1: {}, MODEL_NAME2: {}, MODEL_NAME2_http: {}, - INVALID_PATH: {}, SOME_FILE: {}, }, ) @@ -35,10 +30,7 @@ async def lifespan(_app: FastAPI): def enabled_handler(enabled: bool, _nc: NextcloudApp) -> str: if enabled: - try: - assert ex_app.get_model_path(MODEL_NAME1) - except Exception: # noqa - return "model1 not found" + assert ex_app.get_model_path(MODEL_NAME2) assert Path("pytorch_model.bin").is_file() return ""