In [None]:
!pip install --upgrade google-cloud-aiplatform

In [8]:
import os
PROJECT = "sandbox-373102"
REGION = "us-central1"
PACKAGE_PATH = "package"
ARTIFACT_REGISTRY_ROOT_PATH = "base"
BASE_CONTAINER = f"{REGION}-docker.pkg.dev/{PROJECT}/{ARTIFACT_REGISTRY_ROOT_PATH}/transformer:v1"

os.system(f"mkdir -p {PACKAGE_PATH}")

0

In [3]:
%%writefile Dockerfile

FROM python:3.10-slim-buster #nvidia/cuda:12.4.1-runtime-ubuntu22.04 #use for GPU
RUN apt-get -y update
RUN apt-get -y install python3 python3-pip
RUN ln -s /usr/bin/python3 /usr/bin/python
RUN pip install scikit-learn==1.3.0 numpy==1.26.0 pandas==2.1.0 joblib==1.3.0
ENV VERTEX_CPR_MAX_WORKERS 1

Overwriting Dockerfile


In [4]:
os.system(f"gcloud builds submit --region={REGION} --tag={BASE_CONTAINER} --machine-type=E2_HIGHCPU_32")

1

In [5]:
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet

`docker` and `docker-credential-gcloud` need to be in the same PATH in order to work correctly together.
gcloud's Docker credential helper can be configured but it will not work until this is corrected.
Adding credentials for: us-central1-docker.pkg.dev
Docker configuration file updated.


In [6]:
#setting below turns all container build and localmodel logs
import logging
logging.basicConfig(level=logging.INFO)

In [9]:
%%writefile {PACKAGE_PATH}/requirements.txt

Pillow
numpy < 2.0

Writing package/requirements.txt


In [10]:
%%writefile {PACKAGE_PATH}/CustomTaxiPredictor.py

import os
import logging
import time
from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils
import base64
import io
import json
import joblib

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

class CustomTaxiPredictor(Predictor):
    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        # Load model
        logger.info(f"Starting predictor using {artifacts_uri}")
        origin_path = os.getcwd()
        model_path = f"{origin_path}/model"
        os.makedirs(model_path)
        os.chdir(model_path)
        prediction_utils.download_model_artifacts(artifacts_uri)
        os.chdir(origin_path)
        logger.debug('Start model loading...')
        self.model =  joblib.load(f"{model_path}/model.joblib")
        logger.debug('Model loaded successfully')

    def predict(self, prediction_input):
        start_time = time.time()
        predictions = self.model.predict(prediction_input)
        logger.info("--- %s seconds ---" % (time.time() - start_time))
        return {"predictions": []}

Writing package/CustomTaxiPredictor.py


In [None]:
#Install packages for local testing
!pip install scikit-learn==1.3.0 numpy==1.26.0 pandas==2.1.0 joblib==1.3.0

In [None]:
from package.VAIMaskGenerationPredictor import VAIMaskGenerationPredictor
from google.cloud.aiplatform.prediction import LocalModel
#add custom predictor
vai_serving_container_uri = f"{REGION}-docker.pkg.dev/{PROJECT}/custom-inference-gpu/vai-transformer-mask-generator"

#must secure sufficient space
local_model = LocalModel.build_cpr_model(
    src_dir=PACKAGE_PATH,
    output_image_uri=vai_serving_container_uri,
    predictor=VAIMaskGenerationPredictor,
    requirements_path=f"{PACKAGE_PATH}/requirements.txt",
    #extra_packages=["deploy_package/custom_package.tar.gz"]
    base_image=f"{BASE_CONTAINER}",
    no_cache = False
)

In [None]:
import base64
from PIL import Image
from io import BytesIO

def openImage(path):
    return Image.open(path).convert("RGB")

def image_to_base64(image):
    buffer = BytesIO()
    image.save(buffer, format="JPEG")
    image_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
    return image_str

In [None]:
raw_image = openImage("car.png")
raw_image

In [None]:
import json
import sys
import logging
logging.basicConfig(level=logging.INFO, stream=sys.stderr)

test_artifact_uri = "gs://jk-model-repo/facebook/sam-vit-large"
local_endpoint = local_model.deploy_to_local_endpoint(artifact_uri=test_artifact_uri, gpu_count=-1)
local_endpoint.serve()

In [None]:
request = {
        "instances" : [
            {
            #"image": image_to_base64(raw_image),
            "image_uri": "car.png",
            "input_boxes": [[650, 900, 1000, 1250], [2050, 800, 2400, 1150]]
            }
        ]
    }
predict_response = local_endpoint.predict(
        request=json.dumps(request),
        headers={"Content-Type": "application/json"},
    )
predict_response.json()['predictions'][0]['scores']

In [None]:
print(local_endpoint.container.logs().decode("utf-8").strip(), sep="\n")

In [None]:
import matplotlib.pyplot as plt
#image = torch.zeros(480, 640, 3, dtype=torch.uint8) #black color
#image[:, :, :] = 255 #white color
import numpy as np
base = np.zeros((raw_image.height, raw_image.width, 3), np.uint8) #black color
base[:] = (255, 255, 255) #white color
for mask in predict_response.json()['predictions'][0]['masks']:
    #image = np.array(mask).reshape(height, width, 1)
    #np.copyto(base, image, where=(image != 0))
    base += np.array(mask).reshape(raw_image.height, raw_image.width, 1)
base = 255 - base #inverse color
plt.imshow(base)

In [None]:
local_endpoint.stop()

In [None]:
import matplotlib.pyplot as plt
import numpy as np    

output = np.array(raw_image) / 255
for mask in predict_response.json()['predictions'][0]['masks']:
    color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    image = (np.array(mask).reshape(raw_image.height, raw_image.width, 1) * color.reshape(1, 1, -1))[:, :, :3]
    np.copyto(output, image, where=(image != 0))
plt.imshow(output)

In [None]:
#Push locally built image to artifact registry for deploy
local_model.push_image()

In [None]:
#Make model be used for Model repository
from google.cloud import aiplatform
model = aiplatform.Model.upload(
    location=REGION,
    display_name = "sam_bbox",
    local_model = local_model,
    artifact_uri = test_artifact_uri,
    #parent_model = prev_model.resource_name,
    #is_default_version=True,
    serving_container_environment_variables={
        # Optional env var so that `uvicorn` only runs the model in 1 worker
        "VERTEX_CPR_MAX_WORKERS": 4,
    },
)

In [None]:
#Create endpoint for model hosting
remote_endpoint = aiplatform.Endpoint.create(
    display_name=f"SAM test endpoint",
    #labels={"sample-key": "sample-value"},
    location=REGION,
    dedicated_endpoint_enabled=True,
)

In [None]:
#Deploy model under the endpoint
remote_endpoint.deploy(
    model=model,
    machine_type="g2-standard-4",
    #tpu_topology=None,
    min_replica_count=1,
    max_replica_count=1,
    service_account="1045259343465-compute@developer.gserviceaccount.com",
    #traffic_percentage=50
    #traffic_split={'a':50, 'b':50}
    #Configs for GPU
    accelerator_type="NVIDIA_L4",
    accelerator_count=1,
    #deploy_request_timeout=DEPLOY_TIMEOUT
)

In [None]:
instances = [
    {
        #"image": image_to_base64(raw_image),
        "image_uri": "car.png",
        "input_boxes": [[650, 900, 1000, 1250], [2050, 800, 2400, 1150]]
    }
]
predict_response = remote_endpoint.predict(
    instances=instances,
    use_dedicated_endpoint = True
)
predict_response.predictions[0]['scores']