diff --git a/.github/workflows/docker-build-action.yaml b/.github/workflows/docker-build-action.yaml
index 62cba961..fe644056 100644
--- a/.github/workflows/docker-build-action.yaml
+++ b/.github/workflows/docker-build-action.yaml
@@ -63,6 +63,7 @@ jobs:
           push: true
           context: ${{ inputs.context }}
           build-args: ${{ inputs.build_args }}
+          target: base
           file:  ${{ inputs.context }}/${{ inputs.dockerfile }}
           tags: ${{ inputs.repository }}/${{ inputs.image }}:sha-${{ env.GITHUB_SHA_SHORT }},${{ inputs.repository }}/${{ inputs.image }}:latest
      
diff --git a/README.md b/README.md
index f2f66b40..f3056a89 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK
 ### Container
 
 
-1. build the preferred container for either CPU or GPU for PyTorch or TensorFlow.
+1. build the preferred container for either CPU or GPU for PyTorch.
 
 _cpu images_
 ```bash
@@ -58,6 +58,57 @@ curl --request POST \
 }'
 ```
 
+### Vertex AI Support
+
+The Hugging Face Inference Toolkit is also supported on Vertex AI, based on [Custom container requirements for prediction](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements). [Environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) are automatically detected and used by the toolkit. 
+
+#### Local run with HF_MODEL_ID and HF_TASK
+
+Start Hugging Face Inference Toolkit with the following environment variables. 
+
+```bash
+mkdir tmp2/
+AIP_MODE=PREDICTION AIP_PORT=8080 AIP_PREDICT_ROUTE=/pred AIP_HEALTH_ROUTE=/h HF_MODEL_DIR=tmp2 HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app  --port 8080
+```
+
+Send request
+
+```bash
+curl --request POST \
+  --url http://localhost:8080/pred \
+  --header 'Content-Type: application/json' \
+  --data '{
+	"instances": ["I love this product", "I hate this product"],
+	"parameters": { "top_k": 2 }
+}'
+```
+
+#### Container run with HF_MODEL_ID and HF_TASK
+
+1. build the preferred container for either CPU or GPU for PyTorch o.
+
+```bash
+docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t vertex-test-pytorch:gpu .
+```
+
+2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored.
+
+```bash
+docker run -ti -p 8080:8080 -e AIP_MODE=PREDICTION -e AIP_HTTP_PORT=8080 -e AIP_PREDICT_ROUTE=/pred -e AIP_HEALTH_ROUTE=/h -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english -e HF_TASK=text-classification vertex-test-pytorch:gpu
+```
+
+3. Send request
+
+```bash
+curl --request POST \
+	--url http://localhost:8080/pred \
+	--header 'Content-Type: application/json' \
+	--data '{
+	"instances": ["I love this product", "I hate this product"],
+	"parameters": { "top_k": 2 }
+}'
+```
+
 
 ---
 
@@ -176,6 +227,7 @@ Below you ll find a list of supported and tested transformers and sentence trans
 ##  ⚙ Supported Frontend
 
 - [x] Starlette (HF Endpoints)
+- [x] Starlette (Vertex AI)
 - [ ] Starlette (Azure ML)
 - [ ] Starlette (SageMaker)
 
diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile
index 8e4c4d35..c554ce59 100644
--- a/dockerfiles/pytorch/Dockerfile
+++ b/dockerfiles/pytorch/Dockerfile
@@ -1,6 +1,6 @@
 ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04
 
-FROM $BASE_IMAGE
+FROM $BASE_IMAGE as base 
 SHELL ["/bin/bash", "-c"]
 
 LABEL maintainer="Hugging Face"
@@ -45,4 +45,10 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle
 # copy entrypoint and change permissions
 COPY --chmod=0755  scripts/entrypoint.sh entrypoint.sh
 
-ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
\ No newline at end of file
+ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
+
+
+from base as vertex
+
+# Install Vertex AI requiremented packages
+RUN pip install --no-cache-dir google-cloud-storage
diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile
deleted file mode 100644
index d16010bb..00000000
--- a/dockerfiles/tensorflow/cpu/Dockerfile
+++ /dev/null
@@ -1,53 +0,0 @@
-FROM ubuntu:22.04
-
-LABEL maintainer="Hugging Face"
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update \
-    && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \
-    && apt-get install -y \
-    bzip2 \
-    curl \
-    git \
-    git-lfs \
-    tar \
-    gcc \
-    g++ \
-    cmake \
-    # audio
-    libsndfile1-dev \
-    ffmpeg \
-    && apt-get clean autoremove --yes \
-    && rm -rf /var/lib/{apt,dpkg,cache,log}
-
-# install micromamba
-ENV MAMBA_ROOT_PREFIX=/opt/conda
-ENV PATH=/opt/conda/bin:$PATH
-RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \
-    && touch /root/.bashrc \
-    && ./bin/micromamba shell init -s bash -p /opt/conda  \
-    && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc  > /opt/conda/bashrc
-
-WORKDIR /app
-
-# install base python dependencies
-COPY dockerfiles/tensorflow/cpu/environment.yaml /app/environment.yaml
-RUN micromamba install -y -n base -f environment.yaml \
-    && rm environment.yaml \
-    && micromamba clean --all --yes
-
-# install huggingface inference toolkit
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install  --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
-
-# copy application
-COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
-COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py
-
-# copy entrypoint and change permissions
-COPY scripts/entrypoint.sh entrypoint.sh
-RUN chmod +x entrypoint.sh
-
-# run app
-ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
\ No newline at end of file
diff --git a/dockerfiles/tensorflow/cpu/environment.yaml b/dockerfiles/tensorflow/cpu/environment.yaml
deleted file mode 100644
index a370380c..00000000
--- a/dockerfiles/tensorflow/cpu/environment.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: base
-channels:
-- conda-forge
-dependencies:
-- python=3.9.13
-- tensorflow=2.9.1=*cpu*py39*
-- pip:
-  - transformers[sklearn,sentencepiece,audio,vision]==4.27.2
\ No newline at end of file
diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile
deleted file mode 100644
index c42a33c0..00000000
--- a/dockerfiles/tensorflow/gpu/Dockerfile
+++ /dev/null
@@ -1,59 +0,0 @@
-FROM nvidia/cuda:11.2.2-base-ubuntu20.04
-
-LABEL maintainer="Hugging Face"
-
-ENV DEBIAN_FRONTEND=noninteractive
-ENV CONDA_OVERRIDE_CUDA="11.2"
-
-RUN apt-get update \
-    && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \
-    && apt-get install -y \
-    bzip2 \
-    curl \
-    git \
-    git-lfs \
-    tar \
-    gcc \
-    g++ \
-    cmake \
-    # audio
-    libsndfile1-dev \
-    ffmpeg \
-    && apt-get clean autoremove --yes \
-    && rm -rf /var/lib/{apt,dpkg,cache,log}
-
-# install micromamba
-ENV MAMBA_ROOT_PREFIX=/opt/conda
-ENV PATH=/opt/conda/bin:$PATH
-ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}"
-
-RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \
-    && touch /root/.bashrc \
-    && ./bin/micromamba shell init -s bash -p /opt/conda  \
-    && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc  > /opt/conda/bashrc
-
-WORKDIR /app
-
-# install base python dependencies
-COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml
-RUN micromamba install -y -n base -f environment.yaml \
-    && rm environment.yaml \
-    && micromamba clean --all --yes
-
-# install huggingface inference toolkit
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install  --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
-
-# copy tests
-COPY . /tmp/hf-inference-test
-
-# copy application
-COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
-COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py
-
-# copy entrypoint and change permissions
-COPY scripts/entrypoint.sh entrypoint.sh
-RUN chmod +x entrypoint.sh
-
-# run app
-ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
\ No newline at end of file
diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml
deleted file mode 100644
index 1d886795..00000000
--- a/dockerfiles/tensorflow/gpu/environment.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-name: base
-channels:
-- conda-forge
-dependencies:
-- python=3.9.13
-- nvidia::cudatoolkit=11.7
-- tensorflow=2.9.1=*cuda112*py39*
-- pip:
-  - transformers[sklearn,sentencepiece,audio,vision]==4.27.2
\ No newline at end of file
diff --git a/makefile b/makefile
index a9490428..3502d83e 100644
--- a/makefile
+++ b/makefile
@@ -26,5 +26,11 @@ inference-pytorch-gpu:
 inference-pytorch-cpu:
 	docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu .
 
+vertex-pytorch-gpu:
+	docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu .
+
+vertex-pytorch-cpu:
+	docker build  -t vertex --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu .
+
 stop-all:
 	docker stop $$(docker ps -a -q) && docker container prune --force
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh
index 8544a63c..3e62536b 100644
--- a/scripts/entrypoint.sh
+++ b/scripts/entrypoint.sh
@@ -1,13 +1,21 @@
-# /bin/bash
+#!/bin/bash
 
-# check if HF_MODEL_DIR is set and if not skip installing custom dependencies
+# Define the default port
+PORT=5000
+
+# Check if AIP_MODE is set and adjust the port for Vertex AI
+if [[ ! -z "${AIP_MODE}" ]]; then
+  PORT=${AIP_HTTP_PORT}
+fi
+
+# Check if HF_MODEL_DIR is set and if not skip installing custom dependencies
 if [[ ! -z "${HF_MODEL_DIR}" ]]; then
-  # check if requirements.txt exists and if so install dependencies
+  # Check if requirements.txt exists and if so install dependencies
   if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then
     echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt"
     pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir;
   fi
 fi
 
-# start the server
-uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000
\ No newline at end of file
+# Start the server
+uvicorn webservice_starlette:app --host 0.0.0.0 --port ${PORT}
diff --git a/setup.py b/setup.py
index 5e99df02..deffb557 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,10 @@
 from __future__ import absolute_import
-from datetime import date
 from setuptools import find_packages, setup
 
 # We don't declare our dependency on transformers here because we build with
 # different packages for different variants
 
-VERSION = "0.3.0"
-
+VERSION = "0.4.0"
 
 # Ubuntu packages
 # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev
@@ -14,11 +12,7 @@
 # libavcodec-extra : libavcodec-extra  inculdes additional codecs for ffmpeg
 
 install_requires = [
-    "wheel==0.42.0",
-    "setuptools==69.1.0",
-    "cmake==3.28.3",
-    "transformers[sklearn,sentencepiece, audio, vision]==4.38.2",
-    "huggingface_hub==0.20.3",
+    "transformers[sklearn,sentencepiece, audio,vision]==4.41.1",
     "orjson",
     # vision
     "Pillow",
@@ -31,15 +25,14 @@
     "starlette",
     "uvicorn",
     "pandas",
-    "peft==0.9.0"
+    "peft==0.11.1"
 ]
 
 extras = {}
 
-extras["st"] = ["sentence_transformers==2.4.0"]
+extras["st"] = ["sentence_transformers==2.7.0"]
 extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"]
-extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"]
-extras["tensorflow"] = ["tensorflow"]
+extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"]
 extras["test"] = [
     "pytest==7.2.1",
     "pytest-xdist",
diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py
index 08368326..0a8c93b8 100644
--- a/src/huggingface_inference_toolkit/handler.py
+++ b/src/huggingface_inference_toolkit/handler.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from pathlib import Path
 from typing import Optional, Union
 
@@ -40,15 +41,52 @@ def __call__(self, data):
         return prediction
 
 
+class VertexAIHandler(HuggingFaceHandler):
+    """
+    A Default Vertex AI Hugging Face Inference Handler which abstracts the
+    Vertex AI specific logic for inference.
+    """
+    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
+        super().__init__(model_dir, task, framework)
+
+    def __call__(self, data):
+        """
+        Handles an inference request with input data and makes a prediction.
+        Args:
+            :data: (obj): the raw request body data.
+        :return: prediction output
+        """
+        if "instances" not in data:
+            raise ValueError("The request body must contain a key 'instances' with a list of instances.")
+        parameters = data.pop("parameters", None)
+
+        predictions = []
+        # iterate over all instances and make predictions
+        for inputs in data["instances"]:
+            payload = {"inputs": inputs, "parameters": parameters}
+            predictions.append(super().__call__(payload))
+
+        # reutrn predictions
+        return {"predictions": predictions}
+
 def get_inference_handler_either_custom_or_default_handler(
     model_dir: Path,
     task: Optional[str] = None
 ):
     """
-    get inference handler either custom or default Handler
+    Returns the appropriate inference handler based on the given model directory and task.
+
+    Args:
+        model_dir (Path): The directory path where the model is stored.
+        task (Optional[str]): The task for which the inference handler is required. Defaults to None.
+
+    Returns:
+        InferenceHandler: The appropriate inference handler based on the given model directory and task.
     """
     custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir)
     if custom_pipeline:
         return custom_pipeline
+    elif os.environ.get("AIP_MODE", None) == "PREDICTION":
+        return VertexAIHandler(model_dir=model_dir, task=task)
     else:
         return HuggingFaceHandler(model_dir=model_dir, task=task)
diff --git a/src/huggingface_inference_toolkit/vertex_ai_utils.py b/src/huggingface_inference_toolkit/vertex_ai_utils.py
new file mode 100644
index 00000000..19dd41e2
--- /dev/null
+++ b/src/huggingface_inference_toolkit/vertex_ai_utils.py
@@ -0,0 +1,46 @@
+import logging
+import re
+from pathlib import Path
+from typing import Union
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO)
+
+
+
+_logger = logging.getLogger(__name__)
+
+
+GCS_URI_PREFIX = "gs://"
+
+
+# copied from https://github.com/googleapis/python-aiplatform/blob/94d838d8cfe1599bc2d706e66080c05108821986/google/cloud/aiplatform/utils/prediction_utils.py#L121
+def _load_repository_from_gcs(artifact_uri: str, target_dir: Union[str, Path]="/tmp"):
+    """
+    Load files from GCS path to target_dir
+    """
+    from google.cloud import storage
+    _logger.info(f"Loading model artifacts from {artifact_uri} to {target_dir}")
+    target_dir = Path(target_dir)
+
+    if artifact_uri.startswith(GCS_URI_PREFIX):
+        matches = re.match(f"{GCS_URI_PREFIX}(.*?)/(.*)", artifact_uri)
+        bucket_name, prefix = matches.groups()
+
+        gcs_client = storage.Client()
+        blobs = gcs_client.list_blobs(bucket_name, prefix=prefix)
+        for blob in blobs:
+            name_without_prefix = blob.name[len(prefix) :]
+            name_without_prefix = (
+                name_without_prefix[1:]
+                if name_without_prefix.startswith("/")
+                else name_without_prefix
+            )
+            file_split = name_without_prefix.split("/")
+            directory = target_dir.join(file_split[0:-1])
+            directory.mkdir(parents=True, exist_ok=True)
+            if name_without_prefix and not name_without_prefix.endswith("/"):
+                blob.download_to_filename(name_without_prefix)
+
+    return str(target_dir.absolute())
+
diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py
deleted file mode 100644
index 5aeaf605..00000000
--- a/src/huggingface_inference_toolkit/webservice_robyn.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import logging
-import os
-
-from robyn import Robyn
-
-from huggingface_inference_toolkit.serialization.base import ContentType
-from huggingface_inference_toolkit.serialization.json_utils import Jsoner
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO)
-
-
-app = Robyn(__file__)
-
-HF_MODEL_DIR = os.environ.get("HF_MODEL_DIR", "/opt/huggingface/model")
-HF_TASK = os.environ.get("HF_TASK", None)
-
-# @app.startup_handler
-# async def startup_event():
-# global inference_handler
-
-# if empty_directory_or_not_hf_remote_id is None or task is None:
-#     raise ValueError(
-#         f"""Can't initialize model.
-#             Please set correct model id and task.
-#             Provided values are model_id:
-#             {model_id_or_path} and task:{task}"""
-#     )
-
-# logger.info(f"Initializing model with model_id:{model_id_or_path} and task:{task}")
-# # create inference handler
-# inference_handler = HuggingFaceHandler(HF_MODEL_ID)
-# logger.info(f"Model initialized successfully on device: {inference_handler.model.device}")
-# return inference_handler
-
-
-@app.get("/health")
-async def health():
-    return "OK"
-
-
-@app.post("/predict")
-async def predict(request):
-    try:
-        logger.info(request)
-        content_type = request.headers.get("Content-Type", None)
-        body = ContentType.get_deserializer(content_type).deserialize(request["body"])
-        logger.info(body)
-
-        # pred = inference_handler(body["inputs"])
-        return Jsoner.serialize(body)
-    except Exception as e:
-        logger.error(e)
-        return Jsoner.serialize({"error": str(e)})
-
-
-app.start(port=5000)
diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py
index 8bc68b2e..862560dc 100644
--- a/src/huggingface_inference_toolkit/webservice_starlette.py
+++ b/src/huggingface_inference_toolkit/webservice_starlette.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from pathlib import Path
 from time import perf_counter
 
@@ -20,6 +21,7 @@
 from huggingface_inference_toolkit.serialization.base import ContentType
 from huggingface_inference_toolkit.serialization.json_utils import Jsoner
 from huggingface_inference_toolkit.utils import _load_repository_from_hf, convert_params_to_int_or_bool
+from huggingface_inference_toolkit.vertex_ai_utils import _load_repository_from_gcs
 
 
 def config_logging(level=logging.INFO):
@@ -35,10 +37,11 @@ def config_logging(level=logging.INFO):
 logger = logging.getLogger(__name__)
 
 
-async def some_startup_task():
+async def prepare_model_artifacts():
     global inference_handler
     # 1. check if model artifacts available in HF_MODEL_DIR
     if len(list(Path(HF_MODEL_DIR).glob("**/*"))) <= 0:
+        # 2. if not available, try to load from HF_MODEL_ID
         if HF_MODEL_ID is not None:
             _load_repository_from_hf(
                 repository_id=HF_MODEL_ID,
@@ -47,6 +50,11 @@ async def some_startup_task():
                 revision=HF_REVISION,
                 hf_hub_token=HF_HUB_TOKEN,
             )
+        # 3. check if in Vertex AI environment and load from GCS
+        # If artifactUri not on Model Creation not set returns an empty string
+        elif len(os.environ.get("AIP_STORAGE_URI", '')) > 0:
+            _load_repository_from_gcs(os.environ["AIP_STORAGE_URI"], target_dir=HF_MODEL_DIR)
+        # 4. if not available, raise error
         else:
             raise ValueError(
                 f"""Can't initialize model.
@@ -72,7 +80,7 @@ async def predict(request):
         # try to deserialize payload
         deserialized_body = ContentType.get_deserializer(content_type).deserialize(await request.body())
         # checks if input schema is correct
-        if "inputs" not in deserialized_body:
+        if "inputs" not in deserialized_body and "instances" not in deserialized_body:
             raise ValueError(f"Body needs to provide a inputs key, recieved: {orjson.dumps(deserialized_body)}")
 
         # check for query parameter and add them to the body
@@ -97,14 +105,31 @@ async def predict(request):
         logger.error(e)
         return Response(Jsoner.serialize({"error": str(e)}), status_code=400, media_type="application/json")
 
-
-app = Starlette(
-    debug=True,
-    routes=[
-        Route("/", health, methods=["GET"]),
-        Route("/health", health, methods=["GET"]),
-        Route("/", predict, methods=["POST"]),
-        Route("/predict", predict, methods=["POST"]),
-    ],
-    on_startup=[some_startup_task],
+# Create app based on which cloud environment is used
+if os.getenv("AIP_MODE", None) == "PREDICTION":
+    logger.info("Running in Vertex AI environment")
+    # extract routes from environment variables
+    _predict_route = os.getenv("AIP_PREDICT_ROUTE", None)
+    _health_route = os.getenv("AIP_HEALTH_ROUTE", None)
+    if _predict_route is None or _health_route is None:
+        raise ValueError("AIP_PREDICT_ROUTE and AIP_HEALTH_ROUTE need to be set in Vertex AI environment")
+
+    app = Starlette(
+        debug=False,
+        routes=[
+            Route(_health_route, health, methods=["GET"]),
+            Route(_predict_route, predict, methods=["POST"]),
+        ],
+        on_startup=[prepare_model_artifacts],
+    )
+else:
+    app = Starlette(
+        debug=False,
+        routes=[
+            Route("/", health, methods=["GET"]),
+            Route("/health", health, methods=["GET"]),
+            Route("/", predict, methods=["POST"]),
+            Route("/predict", predict, methods=["POST"]),
+        ],
+        on_startup=[prepare_model_artifacts],
 )
diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py
index 0f2890a8..4384cd4e 100644
--- a/tests/unit/test_diffusers.py
+++ b/tests/unit/test_diffusers.py
@@ -15,7 +15,7 @@
 def test_get_diffusers_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "hf-internal-testing/tiny-stable-diffusion-torch",
+            "echarlaix/tiny-random-stable-diffusion-xl",
             tmpdirname,
             framework="pytorch"
         )
@@ -28,7 +28,7 @@ def test_get_diffusers_pipeline():
 def test_pipe_on_gpu():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "hf-internal-testing/tiny-stable-diffusion-torch",
+            "echarlaix/tiny-random-stable-diffusion-xl",
             tmpdirname,
             framework="pytorch"
         )
@@ -44,7 +44,7 @@ def test_pipe_on_gpu():
 def test_text_to_image_task():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "hf-internal-testing/tiny-stable-diffusion-torch",
+            "echarlaix/tiny-random-stable-diffusion-xl",
             tmpdirname,
             framework="pytorch"
         )