In [108]:
! pip3 install --upgrade xgboost google-cloud-aiplatform --user -q --no-warn-script-location

## 1. Get Project ID and Create Bucket

\[This will issues a ServiceException if a bucket is already there with the name. Should omit the exception in that case\]

In [109]:
PROJECT_ID_DETAILS = !gcloud config get-value project
PROJECT_ID = PROJECT_ID_DETAILS[0]  # The project ID is item 0 in the list returned by the gcloud command
BUCKET=f"{PROJECT_ID}-buckt"
REGION="us-central1"
print(f"Project ID: {PROJECT_ID}")
print(f"Bucket Name: {BUCKET}")

!gsutil mb -l {REGION} gs://{BUCKET}

Project ID: quixotic-galaxy-439523-m3
Bucket Name: quixotic-galaxy-439523-m3-buckt
Creating gs://quixotic-galaxy-439523-m3-buckt/...
ServiceException: 409 A Cloud Storage bucket named 'quixotic-galaxy-439523-m3-buckt' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


## 2. Set Bucket URI and Other Directories

In [110]:
BUCKET_URI=f"gs://{BUCKET}"
APP_NAME="boston-housing"
APPLICATION_DIR = "hypo_opt"
TRAINER_DIR = f"{APPLICATION_DIR}/trainer"

!mkdir -p $APPLICATION_DIR
!mkdir -p $TRAINER_DIR

## 3. Download the Dataset, Clean it and Save in the Bucket

In [111]:
import pandas as pd
import numpy as np

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]
target = target.reshape((len(target)),1)

column_names =  ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]
data_df = pd.DataFrame(np.concatenate((data, target), axis=1), columns=column_names)

file_path = 'boston_housing.csv'
data_df.to_csv(file_path, index=False)
!gsutil cp boston_housing.csv $BUCKET_URI/boston_housing.csv

Copying file://boston_housing.csv [Content-Type=text/csv]...
/ [1 files][ 38.2 KiB/ 38.2 KiB]                                                
Operation completed over 1 objects/38.2 KiB.                                     


## 4. Containerize the Training Application Code

### 4.1 Initialize AI Platform

In [112]:
import google.cloud.aiplatform as aiplatform
from google.cloud.aiplatform import hyperparameter_tuning as hpt

# Initialize the AI Platform client
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

### 4.2 Set Pre-Built Containers

In [113]:
TRAIN_VERSION = "xgboost-cpu.1-1"
DEPLOY_VERSION = "xgboost-cpu.1-1"

TRAIN_IMAGE = "{}-docker.pkg.dev/vertex-ai/training/{}:latest".format(
    REGION.split("-")[0], TRAIN_VERSION
)
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    REGION.split("-")[0], DEPLOY_VERSION
)

### 4.3 Create a Folder Structure as Python Package

In [114]:
# Make folder for Python training script
! rm -rf custom
! mkdir custom

# Add package information
! touch custom/README.md

setup_cfg = "[egg_info]\n\ntag_build =\n\ntag_date = 0"
! echo "$setup_cfg" > custom/setup.cfg

setup_py = "import setuptools\n\nsetuptools.setup(\n\n    install_requires=[\n\n        'cloudml-hypertune', 'gcsfs' \n\n    ],\n\n    packages=setuptools.find_packages())"
! echo "$setup_py" > custom/setup.py

pkg_info = "Metadata-Version: 1.0\n\nName: Iris tabular classification\n\nVersion: 0.0.0\n\nSummary: Demostration training script\n\nHome-page: www.google.com\n\nAuthor: Google\n\nAuthor-email: aferlitsch@google.com\n\nLicense: Public\n\nDescription: Demo\n\nPlatform: Vertex"
! echo "$pkg_info" > custom/PKG-INFO

# Make the training subfolder
! mkdir custom/trainer
! touch custom/trainer/__init__.py

### 4.4 Create Training Script

In [115]:
%%writefile custom/trainer/task.py

import argparse
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from hypertune import HyperTune
import os
import logging

data_location="gs://quixotic-galaxy-439523-m3-buckt/boston_housing.csv"

logging.getLogger().setLevel(logging.INFO)

def train_model(data, n_estimators, max_depth, learning_rate, subsample):
    
    y = data.pop("MEDV")
    X = data
    
    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=0)

    params = {
        'objective': 'reg:squarederror',
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'learning_rate': learning_rate,
        'subsample': subsample
    }

    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train, verbose=False)
    
    mmse_val = mean_squared_error(y_val, model.predict(X_val))

    hpt = HyperTune()
    #hpt.report_hyperparameter_tuning_metric(
    #    hyperparameter_metric_tag='mmse_val',
    #    metric_value=mmse_val,
    #    global_step=1000)
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='mmse_val',
        metric_value=mmse_val)

    return model

def get_args():
    parser = argparse.ArgumentParser(description='XGBoost Hyperparameter Tuning')
    parser.add_argument('--n_estimators', type=int, default=10)
    parser.add_argument('--max_depth', type=int, default=5)
    parser.add_argument('--learning_rate', type=float, default=0.1)
    parser.add_argument('--subsample', type=float, default=0.1)
    args = parser.parse_args()
    return args

def main():
    args = get_args()
    data = pd.read_csv(data_location)
    model = train_model(data=data, 
                        n_estimators=args.n_estimators, 
                        max_depth=args.max_depth,
                        learning_rate=args.learning_rate, 
                        subsample=args.subsample)
    
    model_dir = os.getenv('AIP_MODEL_DIR')
    # GCSFuse conversion
    gs_prefix = 'gs://'
    gcsfuse_prefix = '/gcs/'
    if model_dir.startswith(gs_prefix):
        model_dir = model_dir.replace(gs_prefix, gcsfuse_prefix)
        dirpath = os.path.split(model_dir)[0]
        if not os.path.isdir(dirpath):
            os.makedirs(dirpath)
    
    gcs_model_path = os.path.join(model_dir, 'model.bst')
    model.save_model(gcs_model_path)
    logging.info(f"Saved model artifacts to {gcs_model_path}")
    

if __name__ == "__main__":
    main()


Writing custom/trainer/task.py


### 4.5 Store Training Script on Cloud Storage Bucket

Compress the whole training folder as a tar ball and then store it in a Cloud Storage bucket.

In [116]:
! rm -f custom.tar custom.tar.gz
! tar cvf custom.tar custom
! gzip custom.tar
! gsutil cp custom.tar.gz $BUCKET_URI/trainer_iris.tar.gz

custom/
custom/PKG-INFO
custom/setup.py
custom/trainer/
custom/trainer/__init__.py
custom/trainer/task.py
custom/README.md
custom/setup.cfg
Copying file://custom.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  1.5 KiB/  1.5 KiB]                                                
Operation completed over 1 objects/1.5 KiB.                                      


## 5. Hyperparameter Tuining Job Setup

### 5.1 Set Worker Pool Specs

In [117]:
worker_pool_spec = [
    {
        "replica_count": 1,
        "machine_spec": {
            "machine_type": "n1-standard-4",
        },
        "python_package_spec": {
            "executor_image_uri": TRAIN_IMAGE,
            "package_uris": [BUCKET_URI + "/trainer_iris.tar.gz"],
            "python_module": "trainer.task",
        },
    }
]

In [118]:
# Define custom job
MODEL_DIR = f"{BUCKET_URI}/aiplatform-custom-job"

custom_job = aiplatform.CustomJob(
    display_name="xgboost_train",
    worker_pool_specs=worker_pool_spec,
    base_output_dir=MODEL_DIR
)

In [119]:
# Define the hyperparameter tuning spec
hpt_job = aiplatform.HyperparameterTuningJob(
    display_name="xgboost_hpt",
    custom_job=custom_job,
    metric_spec={
        "mmse_val": "minimize",
    },
    parameter_spec={
        "n_estimators": aiplatform.hyperparameter_tuning.IntegerParameterSpec(min=3, max=4, scale='linear'),
        "max_depth": aiplatform.hyperparameter_tuning.IntegerParameterSpec(min=3, max=4, scale='linear'),
        "learning_rate": aiplatform.hyperparameter_tuning.DoubleParameterSpec(min=0.1, max=0.2, scale='log'),
        "subsample": aiplatform.hyperparameter_tuning.DoubleParameterSpec(min=0.1, max=0.2, scale='log'),
    },
    max_trial_count=5, #This was to limit the time taken for tuning and save free credit. Should be a larger value ideally
    parallel_trial_count=5,
)

In [None]:
hpt_job.run()

Creating HyperparameterTuningJob
HyperparameterTuningJob created. Resource name: projects/442313554841/locations/us-central1/hyperparameterTuningJobs/582446871562158080
To use this HyperparameterTuningJob in another session:
hpt_job = aiplatform.HyperparameterTuningJob.get('projects/442313554841/locations/us-central1/hyperparameterTuningJobs/582446871562158080')
View HyperparameterTuningJob:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/582446871562158080?project=442313554841
HyperparameterTuningJob projects/442313554841/locations/us-central1/hyperparameterTuningJobs/582446871562158080 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/442313554841/locations/us-central1/hyperparameterTuningJobs/582446871562158080 current state:
JobState.JOB_STATE_RUNNING
HyperparameterTuningJob projects/442313554841/locations/us-central1/hyperparameterTuningJobs/582446871562158080 current state:
JobState.JOB_STATE_RUNNING
HyperparameterTuningJob pro

In [None]:
results = np.array([trail.final_measurement.metrics[0].value for trail in hpt_job.trials])
id_min = results.argmin()
BEST_MODEL_DIR = MODEL_DIR + "/" + hpt_job.trials[id_min].id + "/model"
! gsutil ls {BEST_MODEL_DIR}

gs://quixotic-galaxy-439523-m3-buckt/aiplatform-custom-job/5/model/
gs://quixotic-galaxy-439523-m3-buckt/aiplatform-custom-job/5/model/model.bst
gs://quixotic-galaxy-439523-m3-buckt/aiplatform-custom-job/5/model/model.pkl


In [None]:
model_v1 = aiplatform.Model.upload(
    display_name="xgboost_best_model",
    artifact_uri=BEST_MODEL_DIR,
    serving_container_image_uri=DEPLOY_IMAGE,
    is_default_version=True
)

print(model_v1)

Creating Model
Create Model backing LRO: projects/442313554841/locations/us-central1/models/5726121317527191552/operations/2330540960951304192


FailedPrecondition: 400 Model directory gs://quixotic-galaxy-439523-m3-buckt/aiplatform-custom-job/5/model is expected to contain exactly one of: [model.pkl, model.joblib, model.bst].

In [None]:

endpoint = aiplatform.Endpoint.create(
    display_name="xgboost_model_endpoint",
    project=PROJECT_ID,
    location=REGION,
    labels={"your_key": "your_value"},
)

print(endpoint)

In [None]:
response = endpoint.deploy(
    model=model_v1,
    deployed_model_display_name="example_",
    machine_type="n1-standard-4",
)

print(endpoint)