diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 7575be68c7..d286f14b82 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -90,6 +90,27 @@ jobs: pip install .[testing,tensorflow] - run: pytest -Werror::FutureWarning -sv ./tests/test_keras* + + build_fastai: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.7", "3.9"] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install .[testing,fastai] + + - run: pytest -Werror::FutureWarning -sv ./tests/test_fastai* tests_lfs: runs-on: ubuntu-latest diff --git a/docs/source/package_reference/mixins.mdx b/docs/source/package_reference/mixins.mdx index 3dffc3ce51..b8ad797dec 100644 --- a/docs/source/package_reference/mixins.mdx +++ b/docs/source/package_reference/mixins.mdx @@ -17,4 +17,13 @@ objects, in order to provide simple uploading and downloading functions. [[autodoc]] save_pretrained_keras -[[autodoc]] KerasModelHubMixin \ No newline at end of file +[[autodoc]] KerasModelHubMixin + +### Fastai + +[[autodoc]] from_pretrained_fastai + +[[autodoc]] push_to_hub_fastai + + + diff --git a/setup.py b/setup.py index 930c40a4ac..c07ece6e93 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,12 @@ def get_version() -> str: "torch", ] +extras["fastai"] = [ + "toml", + "fastai>=2.4", + "fastcore>=1.3.27", +] + extras["tensorflow"] = ["tensorflow", "pydot", "graphviz"] extras["testing"] = [ diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index e5dc27678e..df0bf7d765 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -31,6 +31,11 @@ TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, ) +from .fastai_utils import ( + _save_pretrained_fastai, + from_pretrained_fastai, + push_to_hub_fastai, +) from .file_download import cached_download, hf_hub_download, hf_hub_url from .hf_api import ( DatasetSearchArguments, diff --git a/src/huggingface_hub/fastai_utils.py b/src/huggingface_hub/fastai_utils.py new file mode 100644 index 0000000000..eb16c2bdb5 --- /dev/null +++ b/src/huggingface_hub/fastai_utils.py @@ -0,0 +1,438 @@ +import json +import os +from pathlib import Path +from pickle import DEFAULT_PROTOCOL, PicklingError +from typing import Any, Dict, Optional + +from packaging import version + +from huggingface_hub.constants import CONFIG_NAME +from huggingface_hub.file_download import ( + _PY_VERSION, + get_fastai_version, + get_fastcore_version, +) +from huggingface_hub.hf_api import HfApi, HfFolder +from huggingface_hub.repository import Repository +from huggingface_hub.snapshot_download import snapshot_download +from huggingface_hub.utils import logging + + +logger = logging.get_logger(__name__) + + +def _check_fastai_fastcore_versions( + fastai_min_version: Optional[str] = "2.4", + fastcore_min_version: Optional[str] = "1.3.27", +): + """ + Checks that the installed fastai and fastcore versions are compatible for pickle serialization. + + Args: + fastai_min_version (`str`, *optional*): + The minimum fastai version supported. + fastcore_min_version (`str`, *optional*): + The minimum fastcore version supported. + + + Raises the following error: + + - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + if the fastai or fastcore libraries are not available or are of an invalid version. + + + """ + + if (get_fastcore_version() or get_fastai_version()) == "N/A": + raise ImportError( + f"fastai>={fastai_min_version} and fastcore>={fastcore_min_version} are" + f" required. Currently using fastai=={get_fastai_version()} and" + f" fastcore=={get_fastcore_version()}." + ) + + current_fastai_version = version.Version(get_fastai_version()) + current_fastcore_version = version.Version(get_fastcore_version()) + + if current_fastai_version < version.Version(fastai_min_version): + raise ImportError( + "`push_to_hub_fastai` and `from_pretrained_fastai` require a" + f" fastai>={fastai_min_version} version, but you are using fastai version" + f" {get_fastai_version()} which is incompatible. Upgrade with `pip install" + " fastai==2.5.6`." + ) + + if current_fastcore_version < version.Version(fastcore_min_version): + raise ImportError( + "`push_to_hub_fastai` and `from_pretrained_fastai` require a" + f" fastcore>={fastcore_min_version} version, but you are using fastcore" + f" version {get_fastcore_version()} which is incompatible. Upgrade with" + " `pip install fastcore==1.3.27`." + ) + + +def _check_fastai_fastcore_pyproject_versions( + storage_folder: str, + fastai_min_version: Optional[str] = "2.4", + fastcore_min_version: Optional[str] = "1.3.27", +): + """ + Checks that the `pyproject.toml` file in the directory `storage_folder` has fastai and fastcore versions + that are compatible with `from_pretrained_fastai` and `push_to_hub_fastai`. If `pyproject.toml` does not exist + or does not contain versions for fastai and fastcore, then it logs a warning. + + Args: + storage_folder (`str`): + Folder to look for the `pyproject.toml` file. + fastai_min_version (`str`, *optional*): + The minimum fastai version supported. + fastcore_min_version (`str`, *optional*): + The minimum fastcore version supported. + + + Raises the following errors: + + - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + if the `toml` module is not installed. + - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + if the `pyproject.toml` indicates a lower than minimum supported version of fastai or fastcore. + + + """ + + try: + import toml + except ModuleNotFoundError: + raise ImportError( + "`push_to_hub_fastai` and `from_pretrained_fastai` require the toml module." + " Install it with `pip install toml`." + ) + + # Checks that a `pyproject.toml`, with `build-system` and `requires` sections, exists in the repository. If so, get a list of required packages. + if not os.path.isfile(f"{storage_folder}/pyproject.toml"): + logger.warning( + "There is no `pyproject.toml` in the repository that contains the fastai" + " `Learner`. The `pyproject.toml` would allow us to verify that your fastai" + " and fastcore versions are compatible with those of the model you want to" + " load." + ) + return + pyproject_toml = toml.load(f"{storage_folder}/pyproject.toml") + + if "build-system" not in pyproject_toml.keys(): + logger.warning( + "There is no `build-system` section in the pyproject.toml of the repository" + " that contains the fastai `Learner`. The `build-system` would allow us to" + " verify that your fastai and fastcore versions are compatible with those" + " of the model you want to load." + ) + return + build_system_toml = pyproject_toml["build-system"] + + if "requires" not in build_system_toml.keys(): + logger.warning( + "There is no `requires` section in the pyproject.toml of the repository" + " that contains the fastai `Learner`. The `requires` would allow us to" + " verify that your fastai and fastcore versions are compatible with those" + " of the model you want to load." + ) + return + package_versions = build_system_toml["requires"] + + # Extracts contains fastai and fastcore versions from `pyproject.toml` if available. + # If the package is specified but not the version (e.g. "fastai" instead of "fastai=2.4"), the default versions are the highest. + fastai_packages = [pck for pck in package_versions if pck.startswith("fastai")] + if len(fastai_packages) == 0: + logger.warning( + "The repository does not have a fastai version specified in the" + " `pyproject.toml`." + ) + # fastai_version is an empty string if not specified + else: + fastai_version = str(fastai_packages[0]).partition("=")[2] + if fastai_version != "" and version.Version(fastai_version) < version.Version( + fastai_min_version + ): + raise ImportError( + "`from_pretrained_fastai` requires" + f" fastai>={fastai_min_version} version but the model to load uses" + f" {fastai_version} which is incompatible." + ) + + fastcore_packages = [pck for pck in package_versions if pck.startswith("fastcore")] + if len(fastcore_packages) == 0: + logger.warning( + "The repository does not have a fastcore version specified in the" + " `pyproject.toml`." + ) + # fastcore_version is an empty string if not specified + else: + fastcore_version = str(fastcore_packages[0]).partition("=")[2] + if fastcore_version != "" and version.Version( + fastcore_version + ) < version.Version(fastcore_min_version): + raise ImportError( + "`from_pretrained_fastai` requires" + f" fastcore>={fastcore_min_version} version, but you are using fastcore" + f" version {fastcore_version} which is incompatible." + ) + + +README_TEMPLATE = """--- +tags: +- fastai +--- + +# Amazing! + +🥳 Congratulations on hosting your fastai model on the Hugging Face Hub! + +# Some next steps +1. Fill out this model card with more information (see the template below and the [documentation here](https://huggingface.co/docs/hub/model-repos))! + +2. Create a demo in Gradio or Streamlit using 🤗 Spaces ([documentation here](https://huggingface.co/docs/hub/spaces)). + +3. Join the fastai community on the [Fastai Discord](https://discord.com/invite/YKrxeNn)! + +Greetings fellow fastlearner 🤝! Don't forget to delete this content from your model card. + + +--- + + +# Model card + +## Model description +More information needed + +## Intended uses & limitations +More information needed + +## Training and evaluation data +More information needed +""" + +PYPROJECT_TEMPLATE = f"""[build-system] +requires = ["setuptools>=40.8.0", "wheel", "python={_PY_VERSION}", "fastai={get_fastai_version()}", "fastcore={get_fastcore_version()}"] +build-backend = "setuptools.build_meta:__legacy__" +""" + + +def _create_model_card(repo_dir: Path): + """ + Creates a model card for the repository. + + Args: + repo_dir (`Path`): + Directory where model card is created. + """ + readme_path = repo_dir / "README.md" + + if not readme_path.exists(): + with readme_path.open("w", encoding="utf-8") as f: + f.write(README_TEMPLATE) + + +def _create_model_pyproject(repo_dir: Path): + """ + Creates a `pyproject.toml` for the repository. + + Args: + repo_dir (`Path`): + Directory where `pyproject.toml` is created. + """ + pyproject_path = repo_dir / "pyproject.toml" + + if not pyproject_path.exists(): + with pyproject_path.open("w", encoding="utf-8") as f: + f.write(PYPROJECT_TEMPLATE) + + +def _save_pretrained_fastai( + learner, + save_directory: str, + config: Optional[Dict[str, Any]] = None, +): + """ + Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used. + + Args: + learner (`Learner`): + The `fastai.Learner` you'd like to save. + save_directory (`str`): + Specific directory in which you want to save the fastai learner. + config (`dict`, *optional*): + Configuration object. Will be uploaded as a .json file. Example: 'https://huggingface.co/espejelomar/fastai-pet-breeds-classification/blob/main/config.json'. + + + + Raises the following error: + + - [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError) + if the config file provided is not a dictionary. + + + """ + _check_fastai_fastcore_versions() + + os.makedirs(save_directory, exist_ok=True) + + # if the user provides config then we update it with the fastai and fastcore versions in CONFIG_TEMPLATE. + if config is not None: + if not isinstance(config, dict): + raise RuntimeError( + f"Provided config should be a dict. Got: '{type(config)}'" + ) + path = os.path.join(save_directory, CONFIG_NAME) + with open(path, "w") as f: + json.dump(config, f) + + _create_model_card(Path(save_directory)) + _create_model_pyproject(Path(save_directory)) + + # learner.export saves the model in `self.path`. + learner.path = Path(save_directory) + os.makedirs(save_directory, exist_ok=True) + try: + learner.export( + fname="model.pkl", + pickle_protocol=DEFAULT_PROTOCOL, + ) + except PicklingError: + raise PicklingError( + "You are using a lambda function, i.e., an anonymous function. `pickle`" + " cannot pickle function objects and requires that all functions have" + " names. One possible solution is to name the function." + ) + + +def from_pretrained_fastai( + repo_id: str, + revision: Optional[str] = None, +): + """ + Load pretrained fastai model from the Hub or from a local directory. + + Args: + repo_id (`str`): + The location where the pickled fastai.Learner is. It can be either of the two: + - Hosted on the Hugging Face Hub. E.g.: 'espejelomar/fatai-pet-breeds-classification' or 'distilgpt2'. + You can add a `revision` by appending `@` at the end of `repo_id`. E.g.: `dbmdz/bert-base-german-cased@main`. + Revision is the specific model version to use. Since we use a git-based system for storing models and other + artifacts on the Hugging Face Hub, it can be a branch name, a tag name, or a commit id. + - Hosted locally. `repo_id` would be a directory containing the pickle and a pyproject.toml + indicating the fastai and fastcore versions used to build the `fastai.Learner`. E.g.: `./my_model_directory/`. + revision (`str`, *optional*): + Revision at which the repo's files are downloaded. See documentation of `snapshot_download`. + + Returns: + The `fastai.Learner` model in the `repo_id` repo. + """ + _check_fastai_fastcore_versions() + + # Load the `repo_id` repo. + # `snapshot_download` returns the folder where the model was stored. + # `cache_dir` will be the default '/root/.cache/huggingface/hub' + if not os.path.isdir(repo_id): + storage_folder = snapshot_download( + repo_id=repo_id, + revision=revision, + library_name="fastai", + library_version=get_fastai_version(), + ) + else: + storage_folder = repo_id + + _check_fastai_fastcore_pyproject_versions(storage_folder) + + from fastai.learner import load_learner + + return load_learner(os.path.join(storage_folder, "model.pkl")) + + +def push_to_hub_fastai( + learner, + repo_id: str, + commit_message: Optional[str] = "Add model", + private: Optional[bool] = None, + token: Optional[str] = None, + config: Optional[dict] = None, + **kwargs, +): + """ + Upload learner checkpoint files to the Hub while synchronizing a local clone of the repo in + :obj:`repo_id`. + + Args: + learner (`Learner`): + The `fastai.Learner' you'd like to push to the Hub. + repo_id (`str`): + The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de'). + commit_message (`str`, *optional*): + Message to commit while pushing. Will default to :obj:`"add model"`. + private (`bool`, *optional*): + Whether or not the repository created should be private. + token (`str`, *optional*): + The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt. + config (`dict`, *optional*): + Configuration object to be saved alongside the model weights. + + Keyword Args: + api_endpoint (`str`, *optional*): + The API endpoint to use when pushing the model to the hub. + git_user (`str`, *optional*): + Will override the ``git config user.name`` for committing and pushing files to the hub. + git_email (`str`, *optional*): + Will override the ``git config user.email`` for committing and pushing files to the hub. + + Returns: + The url of the commit of your model in the given repository. + + + + Raises the following error: + + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if the user is not log on to the Hugging Face Hub. + + + """ + + _check_fastai_fastcore_versions() + + api_endpoint: str = kwargs.get("api_endpoint", None) + git_user: str = kwargs.get("git_user", None) + git_email: str = kwargs.get("git_email", None) + + if token is None: + token = HfFolder.get_token() + + if token is None: + raise ValueError( + "You must login to the Hugging Face Hub. There are two options: " + "(1) Type `huggingface-cli login` in your terminal and enter your token. " + "(2) Enter your token in the `token` argument. " + "Your token is available in the Settings of your Hugging Face account. " + ) + + # Create repo using `HfApi()`. + repo_url = HfApi(endpoint=api_endpoint).create_repo( + repo_id, + token=token, + private=private, + repo_type=None, + exist_ok=True, + ) + + # If repository exists in the Hugging Face Hub then clone it locally in `repo_id`. + repo = Repository( + repo_id, + clone_from=repo_url, + use_auth_token=token, + git_user=git_user, + git_email=git_email, + ) + repo.git_pull(rebase=True) + + _save_pretrained_fastai(learner, repo_id, config=config) + + return repo.push_to_hub(commit_message=commit_message) diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index f580f69a82..6344e2b4fb 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -97,6 +97,22 @@ def is_graphviz_available(): except importlib_metadata.PackageNotFoundError: pass +_fastai_version = "N/A" +_fastai_available = False +try: + _fastai_version: str = importlib_metadata.version("fastai") + _fastai_available = True +except importlib_metadata.PackageNotFoundError: + pass + +_fastcore_version = "N/A" +_fastcore_available = False +try: + _fastcore_version: str = importlib_metadata.version("fastcore") + _fastcore_available = True +except importlib_metadata.PackageNotFoundError: + pass + def is_torch_available(): return _torch_available @@ -106,6 +122,22 @@ def is_tf_available(): return _tf_available +def is_fastai_available(): + return _fastai_available + + +def get_fastai_version(): + return _fastai_version + + +def is_fastcore_available(): + return _fastcore_available + + +def get_fastcore_version(): + return _fastcore_version + + @_deprecate_positional_args def hf_hub_url( repo_id: str, @@ -275,6 +307,10 @@ def http_user_agent( ua += f"; torch/{_torch_version}" if is_tf_available(): ua += f"; tensorflow/{_tf_version}" + if is_fastai_available(): + ua += f"; fastai/{_fastai_version}" + if is_fastcore_available(): + ua += f"; fastcore/{_fastcore_version}" if isinstance(user_agent, dict): ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items()) elif isinstance(user_agent, str): diff --git a/tests/test_fastai_integration.py b/tests/test_fastai_integration.py new file mode 100644 index 0000000000..32b2f6b407 --- /dev/null +++ b/tests/test_fastai_integration.py @@ -0,0 +1,123 @@ +import os +import shutil +import time +import uuid +from unittest import TestCase, skip + +from huggingface_hub import HfApi +from huggingface_hub.fastai_utils import ( + _save_pretrained_fastai, + from_pretrained_fastai, + push_to_hub_fastai, +) +from huggingface_hub.file_download import ( + is_fastai_available, + is_fastcore_available, + is_torch_available, +) + +from .testing_constants import ENDPOINT_STAGING, TOKEN, USER +from .testing_utils import set_write_permission_and_retry + + +def repo_name(id=uuid.uuid4().hex[:6]): + return "fastai-repo-{0}-{1}".format(id, int(time.time() * 10e3)) + + +WORKING_REPO_SUBDIR = f"fixtures/working_repo_{__name__.split('.')[-1]}" +WORKING_REPO_DIR = os.path.join( + os.path.dirname(os.path.abspath(__file__)), WORKING_REPO_SUBDIR +) + +if is_fastai_available(): + from fastai.data.block import DataBlock + from fastai.test_utils import synth_learner + +if is_torch_available(): + import torch + + +def require_fastai_fastcore(test_case): + """ + Decorator marking a test that requires fastai and fastcore. + These tests are skipped when fastai and fastcore are not installed. + """ + if not is_fastai_available(): + return skip("Test requires fastai")(test_case) + elif not is_fastcore_available(): + return skip("Test requires fastcore")(test_case) + else: + return test_case + + +def fake_dataloaders(a=2, b=3, bs=16, n=10): + def get_data(n): + x = torch.randn(bs * n, 1) + return torch.cat((x, a * x + b + 0.1 * torch.randn(bs * n, 1)), 1) + + ds = get_data(n) + dblock = DataBlock() + return dblock.dataloaders(ds) + + +if is_fastai_available(): + dummy_model = synth_learner(data=fake_dataloaders()) + dummy_config = dict(test="test_0") +else: + dummy_model = None + dummy_config = None + + +@require_fastai_fastcore +class TestFastaiUtils(TestCase): + @classmethod + def setUpClass(cls): + """ + Share this valid token in all tests below. + """ + cls._api = HfApi(endpoint=ENDPOINT_STAGING) + cls._token = TOKEN + cls._api.set_access_token(TOKEN) + + def tearDown(self) -> None: + try: + shutil.rmtree(WORKING_REPO_DIR, onerror=set_write_permission_and_retry) + except FileNotFoundError: + pass + + def test_save_pretrained_without_config(self): + REPO_NAME = repo_name("save") + _save_pretrained_fastai(dummy_model, f"{WORKING_REPO_DIR}/{REPO_NAME}") + files = os.listdir(f"{WORKING_REPO_DIR}/{REPO_NAME}") + self.assertTrue("model.pkl" in files) + self.assertTrue("pyproject.toml" in files) + self.assertTrue("README.md" in files) + self.assertEqual(len(files), 3) + + def test_save_pretrained_with_config(self): + REPO_NAME = repo_name("save") + _save_pretrained_fastai( + dummy_model, f"{WORKING_REPO_DIR}/{REPO_NAME}", config=dummy_config + ) + files = os.listdir(f"{WORKING_REPO_DIR}/{REPO_NAME}") + self.assertTrue("config.json" in files) + self.assertEqual(len(files), 4) + + def test_push_to_hub_and_from_pretrained_fastai(self): + REPO_NAME = repo_name("push_to_hub") + push_to_hub_fastai( + learner=dummy_model, + repo_id=f"{USER}/{REPO_NAME}", + token=self._token, + config=dummy_config, + ) + model_info = self._api.model_info( + f"{USER}/{REPO_NAME}", + ) + self.assertEqual(model_info.modelId, f"{USER}/{REPO_NAME}") + + loaded_model = from_pretrained_fastai(f"{USER}/{REPO_NAME}") + self.assertEqual( + dummy_model.show_training_loop(), loaded_model.show_training_loop() + ) + self._api.delete_repo(repo_id=f"{REPO_NAME}", token=self._token)