In [1]:
%%writefile requirements.txt
diffusers
ftfy
google-cloud-aiplatform
gradio
ninja
tensorboard==1.15.0
torch
torchaudio
torchvision
torchserve
torch-model-archiver
torch-workflow-archiver
transformers

Overwriting requirements.txt


In [1]:
%pip install -r requirements.txt

Collecting diffusers (from -r requirements.txt (line 1))
  Using cached diffusers-0.31.0-py3-none-any.whl.metadata (18 kB)
Collecting ftfy (from -r requirements.txt (line 2))
  Using cached ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting gradio (from -r requirements.txt (line 4))
  Using cached gradio-5.7.1-py3-none-any.whl.metadata (16 kB)
Collecting ninja (from -r requirements.txt (line 5))
  Using cached ninja-1.11.1.2-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)
Collecting tensorboard==1.15.0 (from -r requirements.txt (line 6))
  Using cached tensorboard-1.15.0-py3-none-any.whl.metadata (1.8 kB)
Collecting torch (from -r requirements.txt (line 7))
  Using cached torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchaudio (from -r requirements.txt (line 8))
  Using cached torchaudio-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting torchvision (from -r requirements.txt (line 9))
  Using cached torchvis

In [3]:
PROJECT_ID = "assignment1-438116"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

REGION = "us-central1"

Updated property [core/project].


In [2]:

import base64
import math

import torch
from diffusers import StableDiffusionPipeline
from google.cloud import aiplatform
from IPython import display
from PIL import Image
from torch import autocast

In [36]:
BUCKET_NAME="text-to-image-model-429503"
BUCKET_URI = f"gs://{BUCKET_NAME}/"
FULL_GCS_PATH = f"{BUCKET_URI}model_artifacts"

In [6]:
!gsutil -m cp -r gs://text-to-image-model-429503/* .

Copying gs://text-to-image-model-429503/model/checkpoint-500/optimizer.bin...
Copying gs://text-to-image-model-429503/model/checkpoint-500/random_states_0.pkl...
Copying gs://text-to-image-model-429503/model/checkpoint-500/scaler.pt...       
Copying gs://text-to-image-model-429503/model/checkpoint-500/scheduler.bin...   
Copying gs://text-to-image-model-429503/model/checkpoint-500/unet/config.json...
Copying gs://text-to-image-model-429503/model/checkpoint-500/unet/diffusion_pytorch_model.safetensors...
Copying gs://text-to-image-model-429503/model/checkpoint-500/unet_ema/config.json...
Copying gs://text-to-image-model-429503/model/checkpoint-500/unet_ema/diffusion_pytorch_model.safetensors...
Copying gs://text-to-image-model-429503/model/feature_extractor/preprocessor_config.json...
Copying gs://text-to-image-model-429503/model/logs/text2image-fine-tune/1732728687.3225884/events.out.tfevents.1732728687.5d19749c37f3.259.1...
Copying gs://text-to-image-model-429503/model/safety_checker

In [7]:
!mkdir model_artifacts

In [10]:
!mv model model_artifacts

In [3]:
%%writefile handler.py

import base64
import logging
from io import BytesIO

import torch
from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionPipeline
from ts.torch_handler.base_handler import BaseHandler

logger = logging.getLogger(__name__)
model_id = '/model'


class ModelHandler(BaseHandler):

  def __init__(self):
    self.initialized = False
    self.map_location = None
    self.device = None
    self.use_gpu = True
    self.store_avg = True
    self.pipe = None

  def initialize(self, context):
    """Initializes the pipe."""
    properties = context.system_properties
    gpu_id = properties.get('gpu_id')

    self.map_location, self.device, self.use_gpu = \
      ('cuda', torch.device('cuda:' + str(gpu_id)),
       True) if torch.cuda.is_available() else \
        ('cpu', torch.device('cpu'), False)

    # Use the Euler scheduler here instead
    scheduler = EulerDiscreteScheduler.from_pretrained(model_id,
                                                       subfolder='scheduler')
    pipe = StableDiffusionPipeline.from_pretrained(model_id,
                                                   scheduler=scheduler,
                                                   torch_dtype=torch.float16)
    if torch.cuda.is_available():
        pipe = pipe.to('cuda')
    else:
        pipe=pipe.to('cpu')
    
    # Uncomment the following line to reduce the GPU memory usage.
    # pipe.enable_attention_slicing()
    self.pipe = pipe

    self.initialized = True

  def preprocess(self, requests):
    """Noting to do here."""
    logger.info('requests: %s', requests)
    return requests

  def inference(self, preprocessed_data, *args, **kwargs):
    """Run the inference."""
    images = []
    for pd in preprocessed_data:
      prompt = pd['prompt']
      images.extend(self.pipe(prompt).images)
    return images

  def postprocess(self, output_batch):
    """Converts the images to base64 string."""
    postprocessed_data = []
    for op in output_batch:
      fp = BytesIO()
      op.save(fp, format='JPEG')
      postprocessed_data.append(base64.b64encode(fp.getvalue()).decode('utf-8'))
      fp.close()
    return postprocessed_data

Overwriting handler.py


In [5]:
!ls

Untitled.ipynb	model.mar	 requirements.txt  tutorials
handler.py	model_artifacts  src


In [9]:
!torch-model-archiver \
  -f \
  --model-name sdft \
  --version 1.0 \
  --handler handler.py \
  --export-path model_artifacts/model

In [21]:
rm handler.py

In [27]:
!gsutil cp model.mar gs://text-to-image-model-429503/

Copying file://model.mar [Content-Type=application/octet-stream]...
/ [1 files][  1.2 KiB/  1.2 KiB]                                                
Operation completed over 1 objects/1.2 KiB.                                      


In [8]:
!pip install torchserve torch-model-archiver




In [10]:
!torchserve --start --model-store model_artifacts/model --models model=sdft.mar


java not found, please make sure JAVA_HOME is set properly.


In [28]:
!gsutil mv gs://text-to-image-model-429503/model gs://text-to-image-model-429503/model_artifacts 

Copying gs://text-to-image-model-429503/model/checkpoint-500/optimizer.bin [Content-Type=application/octet-stream]...
Removing gs://text-to-image-model-429503/model/checkpoint-500/optimizer.bin...  
Copying gs://text-to-image-model-429503/model/checkpoint-500/random_states_0.pkl [Content-Type=application/octet-stream]...
Removing gs://text-to-image-model-429503/model/checkpoint-500/random_states_0.pkl...
Copying gs://text-to-image-model-429503/model/checkpoint-500/scaler.pt [Content-Type=application/vnd.snesdev-page-table]...
Removing gs://text-to-image-model-429503/model/checkpoint-500/scaler.pt...      
Copying gs://text-to-image-model-429503/model/checkpoint-500/scheduler.bin [Content-Type=application/octet-stream]...
Removing gs://text-to-image-model-429503/model/checkpoint-500/scheduler.bin...  

==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" fo

In [30]:
!gsutil mv gs://text-to-image-model-429503/model.mar gs://text-to-image-model-429503/model_artifacts 

Copying gs://text-to-image-model-429503/model.mar [Content-Type=application/octet-stream]...
Removing gs://text-to-image-model-429503/model.mar...                           

Operation completed over 1 objects/1.2 KiB.                                      


In [31]:
PYTORCH_PREDICTION_IMAGE_URI = (
    "us-docker.pkg.dev/vertex-ai/prediction/pytorch-gpu.1-12:latest"
)
APP_NAME = "stable-diffusion-finetune"
VERSION = 1
MODEL_DISPLAY_NAME = "stable-diffusion-finetune"
MODEL_DESCRIPTION = "finetuned stable_diffusion_1_4"
ENDPOINT_DISPLAY_NAME = f"{APP_NAME}-endpoint"


In [32]:
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_NAME)

In [33]:
!pip install google-cloud-aiplatform



In [37]:
model = aiplatform.Model.upload(
    display_name=MODEL_DISPLAY_NAME,
    description=MODEL_DESCRIPTION,
    serving_container_image_uri=PYTORCH_PREDICTION_IMAGE_URI,
    artifact_uri=FULL_GCS_PATH,
)

model.wait()

print(model.display_name)
print(model.resource_name)

Creating Model
Create Model backing LRO: projects/684086472873/locations/us-central1/models/4029337381357223936/operations/3768473196785827840
Model created. Resource name: projects/684086472873/locations/us-central1/models/4029337381357223936@1
To use this Model in another session:
model = aiplatform.Model('projects/684086472873/locations/us-central1/models/4029337381357223936@1')
stable-diffusion-finetune
projects/684086472873/locations/us-central1/models/4029337381357223936


In [38]:
endpoint = aiplatform.Endpoint.create(display_name=ENDPOINT_DISPLAY_NAME)

Creating Endpoint
Create Endpoint backing LRO: projects/684086472873/locations/us-central1/endpoints/9125006428099051520/operations/5064946939515109376
Endpoint created. Resource name: projects/684086472873/locations/us-central1/endpoints/9125006428099051520
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/684086472873/locations/us-central1/endpoints/9125006428099051520')


In [None]:
from google.cloud import aiplatform

In [None]:
!gcloud ai endpoints describe 8044705467483553792 \
    --region=us-central1


In [None]:
# model = aiplatform.Model('projects/684086472873/locations/us-central1/models/2576926501530238976@1')
# endpoint = aiplatform.Endpoint('projects/684086472873/locations/us-central1/endpoints/4824631733913649152')
model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=MODEL_DISPLAY_NAME,
    machine_type="n1-standard-8",
    accelerator_type="NVIDIA_TESLA_P100",
    accelerator_count=1,
    deploy_request_timeout=5000,
    sync=True,
)