In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Stable Diffusion XL 1.0 - LoRA serving

## Overview

This notebook demonstrates how to download the popular LoRA (Low-Rank Adaptation) adapters from huggingface.co or civitai.com, and serve it together with the [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) model on Vertex AI for online prediction.

### Objective

- Deploy the base model and the LoRA adapter to a [Vertex AI Endpoint resource](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

**NOTE**:

*  Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.
*  This Notebook demonstrate how to deploy a LoRA together the model [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) on Vertex AI prediction endpoint.

### Set following variables for experiments environment:

In [None]:
# Cloud project id.
PROJECT_ID = ""  # @param {type:"string"}

# The region you want to launch jobs in.
REGION = ""  # @param {type:"string"}

# The Cloud Storage bucket for storing experiments output.
# Fill it without the 'gs://' prefix.
GCS_BUCKET = ""  # @param {type:"string"}

# The service account for deploying fine tuned model. The service account looks like:
# '<account_name>@<project>.iam.gserviceaccount.com'
SERVICE_ACCOUNT = ""  # @param {type:"string"}

# LORA source. Either hugging face or civitai. If huggingface_mode is false then LORA is downloaded from civitai
HUGGINGFACE_MODE = False  # @param {type:"boolean"}

Initialize Vertex AI API:

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=GCS_BUCKET)

### Define constants

In [None]:
# The pre-built serving docker image. It contains serving scripts and models.
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-diffusers-serve-opt:20240409_0836_RC00"

### Define common functions

In [None]:
import base64
import glob
import os
from io import BytesIO
from google.cloud import aiplatform, storage
from PIL import Image


def base64_to_image(image_str):
    """Convert base64 encoded string to an image."""
    image = Image.open(BytesIO(base64.b64decode(image_str)))
    return image


def image_grid(imgs, rows=2, cols=2):
    w, h = imgs[0].size
    grid = Image.new(
        mode="RGB", size=(cols * w + 10 * cols, rows * h), color=(255, 255, 255)
    )
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w + 10 * i, i // cols * h))
    return grid


def deploy_model(model_id, lora_id):
    model_name = model_id
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-endpoint")
    serving_env = {
        "MODEL_ID": model_id,
        "LORA_ID": lora_id,
        "TASK": "text-to-image-sdxl",
    }
    model = aiplatform.Model.upload(
        display_name=model_name,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[7080],
        serving_container_predict_route="/predictions/diffusers_serving",
        serving_container_health_route="/ping",
        serving_container_environment_variables=serving_env,
    )
    model.deploy(
        endpoint=endpoint,
        machine_type="g2-standard-8",
        accelerator_type= "NVIDIA_L4",
        accelerator_count=1,
        deploy_request_timeout=1800,
        service_account=SERVICE_ACCOUNT,
    )
    return model, endpoint


def get_bucket_and_blob_name(filepath):
    # The gcs path is of the form gs://<bucket-name>/<blob-name>
    gs_suffix = filepath.split("gs://", 1)[1]
    return tuple(gs_suffix.split("/", 1))


def upload_local_dir_to_gcs(local_dir_path, gcs_dir_path):
    """Uploads files in a local directory to a GCS directory."""
    client = storage.Client()
    bucket_name = gcs_dir_path.split("/")[2]
    bucket = client.get_bucket(bucket_name)
    for local_file in glob.glob(local_dir_path + "/**"):
        if not os.path.isfile(local_file):
            continue
        filename = local_file[1 + len(local_dir_path) :]
        gcs_file_path = os.path.join(gcs_dir_path, filename)
        _, blob_name = get_bucket_and_blob_name(gcs_file_path)
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(local_file)
        print("Copied {} to {}.".format(local_file, gcs_file_path))

## Upload and Deploy models

This section uploads the model to Model Registry and deploys it to a Vertex AI Endpoint resource.

The model deployment step will take ~30 minutes to complete.

### Text-to-image

Deploy the stable diffusion xl model for the text-to-image task. In this example, we deploy the model together with a LoRA.

The `lora-id` can either be a resource name in [Huggingface.co](https://huggingface.co), or a GCS path where the LoRA adaptper was downloaded before hand.
* [Artificialguybr/ToyRedmond-ToyLoraForSDXL10](https://huggingface.co/artificialguybr/ToyRedmond-ToyLoraForSDXL10)
* Download the [ToyRedmond](https://civitai.com/models/125315/toyredmond-toy-lora-for-sd-xl-10) from Civitai.com and save it to a GCS bucket.

Once deployed, you can send a batch of text prompts to the endpoint to generated images.

Please note that this step is going to take at least 15-30 mins.

In [None]:
if(HUGGINGFACE_MODE):
  #Use a LoRA adapter from huggingface.co
  model, endpoint = deploy_model(
      model_id="stabilityai/stable-diffusion-xl-base-1.0",
      lora_id="artificialguybr/ToyRedmond-ToyLoraForSDXL10",
  )
else:
  #OR download a LoRA adapter first and save it to a GCS bucket
  ! rm -r /tmp/lora-adapter
  ! mkdir /tmp/lora-adapter

  url = "https://civitai.com/api/download/models/136880"  # @param {type:"string"}
  destination_folder = "/tmp/lora-adapter"
  file_name = "ToyRedmond-FnkRedmAF.safetensors"  # @param {type:"string"}

  target = f"{destination_folder}/{file_name}"

  !gdown --fuzzy -O $target "$url"
  upload_local_dir_to_gcs("/tmp/lora-adapter", f"gs://{GCS_BUCKET}/lora-adapter")

  model, endpoint = deploy_model(
      model_id="stabilityai/stable-diffusion-xl-base-1.0",
      lora_id=f"gs://{GCS_BUCKET}/lora-adapter",
  )


### Print Model ID and Endpoint:

In [None]:
print (f"Model ID: {model.resource_name}")
print (f"Model ID: {model.name} -> Take a note to copy this in other notebooks")
print (f"Endpoint: {endpoint.resource_name}")
print (f"Endpoint: {endpoint.name} -> Take a note to copy this in other notebooks")