In [None]:
!pip install --upgrade google-cloud-aiplatform

In [None]:
import os
PROJECT = "YOUR_PROJECT_ID"
REGION = "us-central1"
PACKAGE_PATH = "package"
REPO_NAME = "base"
BASE_CONTAINER = f"{REGION}-docker.pkg.dev/{PROJECT}/{REPO_NAME}/scikit-learn:v1"
SERVICE_ACCOUNT = "YOUR_SERVICE_ACCOUNT"
BUCKET_NAME = f"{PROJECT}-vai"
MODEL_PATH = f"gs://{BUCKET_NAME}/"

os.system(f"mkdir -p {PACKAGE_PATH}")

In [None]:
#Create base folder udner artifactory registry
os.system(f"gcloud artifacts repositories create {REPO_NAME} --location={REGION} --repository-format=DOCKER")

In [None]:
%%writefile Dockerfile

FROM python:3.8
#FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 #use for GPU
RUN pip install numpy==1.18.5
RUN pip install scikit-learn==1.0.2 joblib==0.15.1
ENV VERTEX_CPR_MAX_WORKERS 1

In [None]:
os.system(f"gcloud builds submit --region={REGION} --tag={BASE_CONTAINER}") #Additionally you can specify build machine type for faster build

In [None]:
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet

In [None]:
#setting below turns all container build and localmodel logs
import logging
logging.basicConfig(level=logging.INFO)

In [None]:
%%writefile {PACKAGE_PATH}/requirements.txt

xgboost==1.6.2

In [None]:
%%writefile {PACKAGE_PATH}/CustomTaxiPredictor.py

import os
import logging
import time
from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils
import base64
import io
import json
import joblib

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

class CustomTaxiPredictor(Predictor):
    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        # Load model
        logger.info(f"Starting predictor using {artifacts_uri}")
        origin_path = os.getcwd()
        model_path = f"{origin_path}/model"
        os.makedirs(model_path)
        os.chdir(model_path)
        prediction_utils.download_model_artifacts(artifacts_uri)
        os.chdir(origin_path)
        logger.debug('Start model loading...')
        self.model =  joblib.load(f"{model_path}/model.joblib")
        logger.debug('Model loaded successfully')

    def predict(self, prediction_input):
        start_time = time.time()
        logger.info(f"--- input: {prediction_input['instances']}")
        try:
            predictions = self.model.predict(prediction_input["instances"])
        except Exception as e:
            logger.info(e)
        logger.info(f"--- Result: {predictions}")
        logger.info("--- %s seconds ---" % (time.time() - start_time))
        return {"predictions": list(predictions)}

In [None]:
from package.CustomTaxiPredictor import CustomTaxiPredictor
from google.cloud.aiplatform.prediction import LocalModel
#add custom predictor
vai_serving_container_uri = f"{REGION}-docker.pkg.dev/{PROJECT}/{REPO_NAME}/customtaxipredictor" #Must be lower case

#must secure sufficient space
local_model = LocalModel.build_cpr_model(
    src_dir=PACKAGE_PATH,
    output_image_uri=vai_serving_container_uri,
    predictor=CustomTaxiPredictor,
    requirements_path=f"{PACKAGE_PATH}/requirements.txt",
    #extra_packages=["deploy_package/custom_package.tar.gz"]
    base_image=f"{BASE_CONTAINER}",
    no_cache = False
)

In [None]:
import json
import sys
import logging
logging.basicConfig(level=logging.INFO, stream=sys.stderr)

local_endpoint = local_model.deploy_to_local_endpoint(artifact_uri=MODEL_PATH, gpu_count=0) #Set -1 if using GPU
try:
    local_endpoint.serve()
    print(local_endpoint.container.logs().decode("utf-8").strip(), sep="\n")
except:
    #Run below if need to get container log
    print(local_endpoint.container.logs().decode("utf-8").strip(), sep="\n")

In [None]:
request = {
        "instances" : [
            [485,1.12,0.0,"Chicago  Carriage  Cab  Corp",
             41.881,-87.633,41.885,-87.643,0,2019,4,1,1,"Mon",1.094445752708995]
        ]
    }
predict_response = local_endpoint.predict(
        request=json.dumps(request),
        headers={"Content-Type": "application/json"},
    )
predict_response.json()

In [None]:
print(local_endpoint.container.logs().decode("utf-8").strip(), sep="\n")

In [None]:
local_endpoint.stop()

In [None]:
#Push locally built image to artifact registry for deploy
local_model.push_image()

In [None]:
#Make model be used for Model repository
from google.cloud import aiplatform
model = aiplatform.Model.upload(
    location=REGION,
    display_name = "CustomTaxiPredictor",
    local_model = local_model,
    artifact_uri = MODEL_PATH,
    #parent_model = prev_model.resource_name,
    #is_default_version=True,
    serving_container_environment_variables={
        # Optional env var so that `uvicorn` only runs the model in 1 worker
        "VERTEX_CPR_MAX_WORKERS": 4,
    },
)

In [None]:
#Create endpoint for model hosting
remote_endpoint = aiplatform.Endpoint.create(
    display_name=f"CustomTaxiPredictor test endpoint",
    #labels={"sample-key": "sample-value"},
    location=REGION,
    dedicated_endpoint_enabled=True,
)

In [None]:
#Deploy model under the endpoint
remote_endpoint.deploy(
    model=model,
    machine_type="g2-standard-4",
    #tpu_topology=None,
    min_replica_count=1,
    max_replica_count=1,
    service_account=SERVICE_ACCOUNT,
    #traffic_percentage=50
    #traffic_split={'a':50, 'b':50}
    #Configs for GPU
    accelerator_type="NVIDIA_L4",
    accelerator_count=1,
    #deploy_request_timeout=DEPLOY_TIMEOUT
)

In [None]:
instances = [
    [485,1.12,0.0,"Chicago  Carriage  Cab  Corp",
             41.881,-87.633,41.885,-87.643,0,2019,4,1,1,"Mon",1.094445752708995]
]
predict_response = remote_endpoint.predict(
    instances=instances,
    use_dedicated_endpoint = True
)
predict_response.predictions