diff --git a/.github/workflows/docker-build-action.yaml b/.github/workflows/docker-build-action.yaml index 62cba961..fe644056 100644 --- a/.github/workflows/docker-build-action.yaml +++ b/.github/workflows/docker-build-action.yaml @@ -63,6 +63,7 @@ jobs: push: true context: ${{ inputs.context }} build-args: ${{ inputs.build_args }} + target: base file: ${{ inputs.context }}/${{ inputs.dockerfile }} tags: ${{ inputs.repository }}/${{ inputs.image }}:sha-${{ env.GITHUB_SHA_SHORT }},${{ inputs.repository }}/${{ inputs.image }}:latest diff --git a/README.md b/README.md index f2f66b40..f3056a89 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK ### Container -1. build the preferred container for either CPU or GPU for PyTorch or TensorFlow. +1. build the preferred container for either CPU or GPU for PyTorch. _cpu images_ ```bash @@ -58,6 +58,57 @@ curl --request POST \ }' ``` +### Vertex AI Support + +The Hugging Face Inference Toolkit is also supported on Vertex AI, based on [Custom container requirements for prediction](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements). [Environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) are automatically detected and used by the toolkit. + +#### Local run with HF_MODEL_ID and HF_TASK + +Start Hugging Face Inference Toolkit with the following environment variables. + +```bash +mkdir tmp2/ +AIP_MODE=PREDICTION AIP_PORT=8080 AIP_PREDICT_ROUTE=/pred AIP_HEALTH_ROUTE=/h HF_MODEL_DIR=tmp2 HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 8080 +``` + +Send request + +```bash +curl --request POST \ + --url http://localhost:8080/pred \ + --header 'Content-Type: application/json' \ + --data '{ + "instances": ["I love this product", "I hate this product"], + "parameters": { "top_k": 2 } +}' +``` + +#### Container run with HF_MODEL_ID and HF_TASK + +1. build the preferred container for either CPU or GPU for PyTorch o. + +```bash +docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t vertex-test-pytorch:gpu . +``` + +2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. + +```bash +docker run -ti -p 8080:8080 -e AIP_MODE=PREDICTION -e AIP_HTTP_PORT=8080 -e AIP_PREDICT_ROUTE=/pred -e AIP_HEALTH_ROUTE=/h -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english -e HF_TASK=text-classification vertex-test-pytorch:gpu +``` + +3. Send request + +```bash +curl --request POST \ + --url http://localhost:8080/pred \ + --header 'Content-Type: application/json' \ + --data '{ + "instances": ["I love this product", "I hate this product"], + "parameters": { "top_k": 2 } +}' +``` + --- @@ -176,6 +227,7 @@ Below you ll find a list of supported and tested transformers and sentence trans ## ⚙ Supported Frontend - [x] Starlette (HF Endpoints) +- [x] Starlette (Vertex AI) - [ ] Starlette (Azure ML) - [ ] Starlette (SageMaker) diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index 8e4c4d35..c554ce59 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -1,6 +1,6 @@ ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04 -FROM $BASE_IMAGE +FROM $BASE_IMAGE as base SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" @@ -45,4 +45,10 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle # copy entrypoint and change permissions COPY --chmod=0755 scripts/entrypoint.sh entrypoint.sh -ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] + + +from base as vertex + +# Install Vertex AI requiremented packages +RUN pip install --no-cache-dir google-cloud-storage diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile deleted file mode 100644 index d16010bb..00000000 --- a/dockerfiles/tensorflow/cpu/Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -FROM ubuntu:22.04 - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - # audio - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/tensorflow/cpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes - -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/cpu/environment.yaml b/dockerfiles/tensorflow/cpu/environment.yaml deleted file mode 100644 index a370380c..00000000 --- a/dockerfiles/tensorflow/cpu/environment.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- tensorflow=2.9.1=*cpu*py39* -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile deleted file mode 100644 index c42a33c0..00000000 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ /dev/null @@ -1,59 +0,0 @@ -FROM nvidia/cuda:11.2.2-base-ubuntu20.04 - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive -ENV CONDA_OVERRIDE_CUDA="11.2" - -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - # audio - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" - -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes - -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy tests -COPY . /tmp/hf-inference-test - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml deleted file mode 100644 index 1d886795..00000000 --- a/dockerfiles/tensorflow/gpu/environment.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- nvidia::cudatoolkit=11.7 -- tensorflow=2.9.1=*cuda112*py39* -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/makefile b/makefile index a9490428..3502d83e 100644 --- a/makefile +++ b/makefile @@ -26,5 +26,11 @@ inference-pytorch-gpu: inference-pytorch-cpu: docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu . +vertex-pytorch-gpu: + docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu . + +vertex-pytorch-cpu: + docker build -t vertex --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu . + stop-all: docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 8544a63c..3e62536b 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,13 +1,21 @@ -# /bin/bash +#!/bin/bash -# check if HF_MODEL_DIR is set and if not skip installing custom dependencies +# Define the default port +PORT=5000 + +# Check if AIP_MODE is set and adjust the port for Vertex AI +if [[ ! -z "${AIP_MODE}" ]]; then + PORT=${AIP_HTTP_PORT} +fi + +# Check if HF_MODEL_DIR is set and if not skip installing custom dependencies if [[ ! -z "${HF_MODEL_DIR}" ]]; then - # check if requirements.txt exists and if so install dependencies + # Check if requirements.txt exists and if so install dependencies if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt" pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir; fi fi -# start the server -uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000 \ No newline at end of file +# Start the server +uvicorn webservice_starlette:app --host 0.0.0.0 --port ${PORT} diff --git a/setup.py b/setup.py index 5e99df02..deffb557 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,10 @@ from __future__ import absolute_import -from datetime import date from setuptools import find_packages, setup # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.3.0" - +VERSION = "0.4.0" # Ubuntu packages # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev @@ -14,11 +12,7 @@ # libavcodec-extra : libavcodec-extra inculdes additional codecs for ffmpeg install_requires = [ - "wheel==0.42.0", - "setuptools==69.1.0", - "cmake==3.28.3", - "transformers[sklearn,sentencepiece, audio, vision]==4.38.2", - "huggingface_hub==0.20.3", + "transformers[sklearn,sentencepiece, audio,vision]==4.41.1", "orjson", # vision "Pillow", @@ -31,15 +25,14 @@ "starlette", "uvicorn", "pandas", - "peft==0.9.0" + "peft==0.11.1" ] extras = {} -extras["st"] = ["sentence_transformers==2.4.0"] +extras["st"] = ["sentence_transformers==2.7.0"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] -extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] -extras["tensorflow"] = ["tensorflow"] +extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 08368326..0a8c93b8 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -1,4 +1,5 @@ import logging +import os from pathlib import Path from typing import Optional, Union @@ -40,15 +41,52 @@ def __call__(self, data): return prediction +class VertexAIHandler(HuggingFaceHandler): + """ + A Default Vertex AI Hugging Face Inference Handler which abstracts the + Vertex AI specific logic for inference. + """ + def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): + super().__init__(model_dir, task, framework) + + def __call__(self, data): + """ + Handles an inference request with input data and makes a prediction. + Args: + :data: (obj): the raw request body data. + :return: prediction output + """ + if "instances" not in data: + raise ValueError("The request body must contain a key 'instances' with a list of instances.") + parameters = data.pop("parameters", None) + + predictions = [] + # iterate over all instances and make predictions + for inputs in data["instances"]: + payload = {"inputs": inputs, "parameters": parameters} + predictions.append(super().__call__(payload)) + + # reutrn predictions + return {"predictions": predictions} + def get_inference_handler_either_custom_or_default_handler( model_dir: Path, task: Optional[str] = None ): """ - get inference handler either custom or default Handler + Returns the appropriate inference handler based on the given model directory and task. + + Args: + model_dir (Path): The directory path where the model is stored. + task (Optional[str]): The task for which the inference handler is required. Defaults to None. + + Returns: + InferenceHandler: The appropriate inference handler based on the given model directory and task. """ custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir) if custom_pipeline: return custom_pipeline + elif os.environ.get("AIP_MODE", None) == "PREDICTION": + return VertexAIHandler(model_dir=model_dir, task=task) else: return HuggingFaceHandler(model_dir=model_dir, task=task) diff --git a/src/huggingface_inference_toolkit/vertex_ai_utils.py b/src/huggingface_inference_toolkit/vertex_ai_utils.py new file mode 100644 index 00000000..19dd41e2 --- /dev/null +++ b/src/huggingface_inference_toolkit/vertex_ai_utils.py @@ -0,0 +1,46 @@ +import logging +import re +from pathlib import Path +from typing import Union + +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) + + + +_logger = logging.getLogger(__name__) + + +GCS_URI_PREFIX = "gs://" + + +# copied from https://github.com/googleapis/python-aiplatform/blob/94d838d8cfe1599bc2d706e66080c05108821986/google/cloud/aiplatform/utils/prediction_utils.py#L121 +def _load_repository_from_gcs(artifact_uri: str, target_dir: Union[str, Path]="/tmp"): + """ + Load files from GCS path to target_dir + """ + from google.cloud import storage + _logger.info(f"Loading model artifacts from {artifact_uri} to {target_dir}") + target_dir = Path(target_dir) + + if artifact_uri.startswith(GCS_URI_PREFIX): + matches = re.match(f"{GCS_URI_PREFIX}(.*?)/(.*)", artifact_uri) + bucket_name, prefix = matches.groups() + + gcs_client = storage.Client() + blobs = gcs_client.list_blobs(bucket_name, prefix=prefix) + for blob in blobs: + name_without_prefix = blob.name[len(prefix) :] + name_without_prefix = ( + name_without_prefix[1:] + if name_without_prefix.startswith("/") + else name_without_prefix + ) + file_split = name_without_prefix.split("/") + directory = target_dir.join(file_split[0:-1]) + directory.mkdir(parents=True, exist_ok=True) + if name_without_prefix and not name_without_prefix.endswith("/"): + blob.download_to_filename(name_without_prefix) + + return str(target_dir.absolute()) + diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py deleted file mode 100644 index 5aeaf605..00000000 --- a/src/huggingface_inference_toolkit/webservice_robyn.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -import os - -from robyn import Robyn - -from huggingface_inference_toolkit.serialization.base import ContentType -from huggingface_inference_toolkit.serialization.json_utils import Jsoner - -logger = logging.getLogger(__name__) -logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) - - -app = Robyn(__file__) - -HF_MODEL_DIR = os.environ.get("HF_MODEL_DIR", "/opt/huggingface/model") -HF_TASK = os.environ.get("HF_TASK", None) - -# @app.startup_handler -# async def startup_event(): -# global inference_handler - -# if empty_directory_or_not_hf_remote_id is None or task is None: -# raise ValueError( -# f"""Can't initialize model. -# Please set correct model id and task. -# Provided values are model_id: -# {model_id_or_path} and task:{task}""" -# ) - -# logger.info(f"Initializing model with model_id:{model_id_or_path} and task:{task}") -# # create inference handler -# inference_handler = HuggingFaceHandler(HF_MODEL_ID) -# logger.info(f"Model initialized successfully on device: {inference_handler.model.device}") -# return inference_handler - - -@app.get("/health") -async def health(): - return "OK" - - -@app.post("/predict") -async def predict(request): - try: - logger.info(request) - content_type = request.headers.get("Content-Type", None) - body = ContentType.get_deserializer(content_type).deserialize(request["body"]) - logger.info(body) - - # pred = inference_handler(body["inputs"]) - return Jsoner.serialize(body) - except Exception as e: - logger.error(e) - return Jsoner.serialize({"error": str(e)}) - - -app.start(port=5000) diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 8bc68b2e..862560dc 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -1,4 +1,5 @@ import logging +import os from pathlib import Path from time import perf_counter @@ -20,6 +21,7 @@ from huggingface_inference_toolkit.serialization.base import ContentType from huggingface_inference_toolkit.serialization.json_utils import Jsoner from huggingface_inference_toolkit.utils import _load_repository_from_hf, convert_params_to_int_or_bool +from huggingface_inference_toolkit.vertex_ai_utils import _load_repository_from_gcs def config_logging(level=logging.INFO): @@ -35,10 +37,11 @@ def config_logging(level=logging.INFO): logger = logging.getLogger(__name__) -async def some_startup_task(): +async def prepare_model_artifacts(): global inference_handler # 1. check if model artifacts available in HF_MODEL_DIR if len(list(Path(HF_MODEL_DIR).glob("**/*"))) <= 0: + # 2. if not available, try to load from HF_MODEL_ID if HF_MODEL_ID is not None: _load_repository_from_hf( repository_id=HF_MODEL_ID, @@ -47,6 +50,11 @@ async def some_startup_task(): revision=HF_REVISION, hf_hub_token=HF_HUB_TOKEN, ) + # 3. check if in Vertex AI environment and load from GCS + # If artifactUri not on Model Creation not set returns an empty string + elif len(os.environ.get("AIP_STORAGE_URI", '')) > 0: + _load_repository_from_gcs(os.environ["AIP_STORAGE_URI"], target_dir=HF_MODEL_DIR) + # 4. if not available, raise error else: raise ValueError( f"""Can't initialize model. @@ -72,7 +80,7 @@ async def predict(request): # try to deserialize payload deserialized_body = ContentType.get_deserializer(content_type).deserialize(await request.body()) # checks if input schema is correct - if "inputs" not in deserialized_body: + if "inputs" not in deserialized_body and "instances" not in deserialized_body: raise ValueError(f"Body needs to provide a inputs key, recieved: {orjson.dumps(deserialized_body)}") # check for query parameter and add them to the body @@ -97,14 +105,31 @@ async def predict(request): logger.error(e) return Response(Jsoner.serialize({"error": str(e)}), status_code=400, media_type="application/json") - -app = Starlette( - debug=True, - routes=[ - Route("/", health, methods=["GET"]), - Route("/health", health, methods=["GET"]), - Route("/", predict, methods=["POST"]), - Route("/predict", predict, methods=["POST"]), - ], - on_startup=[some_startup_task], +# Create app based on which cloud environment is used +if os.getenv("AIP_MODE", None) == "PREDICTION": + logger.info("Running in Vertex AI environment") + # extract routes from environment variables + _predict_route = os.getenv("AIP_PREDICT_ROUTE", None) + _health_route = os.getenv("AIP_HEALTH_ROUTE", None) + if _predict_route is None or _health_route is None: + raise ValueError("AIP_PREDICT_ROUTE and AIP_HEALTH_ROUTE need to be set in Vertex AI environment") + + app = Starlette( + debug=False, + routes=[ + Route(_health_route, health, methods=["GET"]), + Route(_predict_route, predict, methods=["POST"]), + ], + on_startup=[prepare_model_artifacts], + ) +else: + app = Starlette( + debug=False, + routes=[ + Route("/", health, methods=["GET"]), + Route("/health", health, methods=["GET"]), + Route("/", predict, methods=["POST"]), + Route("/predict", predict, methods=["POST"]), + ], + on_startup=[prepare_model_artifacts], ) diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py index 0f2890a8..4384cd4e 100644 --- a/tests/unit/test_diffusers.py +++ b/tests/unit/test_diffusers.py @@ -15,7 +15,7 @@ def test_get_diffusers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", + "echarlaix/tiny-random-stable-diffusion-xl", tmpdirname, framework="pytorch" ) @@ -28,7 +28,7 @@ def test_get_diffusers_pipeline(): def test_pipe_on_gpu(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", + "echarlaix/tiny-random-stable-diffusion-xl", tmpdirname, framework="pytorch" ) @@ -44,7 +44,7 @@ def test_pipe_on_gpu(): def test_text_to_image_task(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", + "echarlaix/tiny-random-stable-diffusion-xl", tmpdirname, framework="pytorch" )