## Set Variables

In [21]:
PROJECT_ID='jchavezar-demo' # Change it
REGION='us-central1'
TRAIN_IMAGE_URI=f'gcr.io/{PROJECT_ID}/custom_train:v1'
PREDICT_IMAGE_URI=f'gcr.io/{PROJECT_ID}/custom_predict:v1'
PIPELINE_ROOT_PATH='gs://vtx-root-path' # Change it
AIP_STORAGE_URI='gs://vtx-artifacts'

## Create Folder Structure

In [14]:
!rm -fr custom_train_job
!mkdir custom_train_job
!mkdir custom_train_job/train
!mkdir custom_train_job/prediction
!touch custom_train_job/train/__init__.py
!touch custom_train_job/prediction/__init__.py

## Build Training Model code

### Preprocessing for Training and Serving

In [15]:
%%writefile custom_train_job/train/preprocess.py

import os
import pandas as pd
import sys

data_uri = os.environ['AIP_STORAGE_URI']

## Data Cleaning and Normalizating, exporting statistics.

def train_pre_process(dataset):
    dataset = dataset.dropna()
    dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
    dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')
    
    train_dataset = dataset.sample(frac=0.8, random_state=0)
    test_dataset = dataset.drop(train_dataset.index)
    
    train_stats = train_dataset.describe()
    train_stats.pop('MPG')
    train_stats = train_stats.transpose()
    train_stats.to_csv(f'{data_uri}/mpg/stats.csv')
    train_labels = train_dataset.pop('MPG')
    test_labels = test_dataset.pop('MPG')
    
    def norm(x):
        return (x - train_stats['mean'])/train_stats['std']
    normed_train_data = norm(train_dataset)
    normed_test_data = norm(test_dataset)

    return normed_train_data, train_labels, normed_test_data, test_labels

## Using training statistics to equals normalization.

def pred_data_process(data: list):
    column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']
    dataset = pd.DataFrame([data], columns=column_names)

    dataset = dataset.dropna()

    if (dataset['Origin'] == 1).any():
        dataset = dataset.drop(columns=['Origin'])
        dataset['Europe'] = 0
        dataset['Japan'] = 0
        dataset['USA'] = 1

    elif (dataset['Origin'].any == 2).any():
        dataset = dataset.drop(columns=['Origin'])
        dataset['Europe'] = 1
        dataset['Japan'] = 0
        dataset['USA'] = 0

    elif (dataset['Origin'] == 3).any():
        dataset = dataset.drop(columns=['Origin'])
        dataset['Europe'] = 0
        dataset['Japan'] = 1
        dataset['USA'] = 0

    ## Train stats
    train_stats = pd.read_csv(f'{data_uri}/mpg/stats.csv', index_col=[0])
    
    def norm(x):
        return (x - train_stats['mean'])/train_stats['std']
    
    return norm(dataset)

Writing custom_train_job/train/preprocess.py


In [29]:
!cp custom_train_job/train/preprocess.py custom_train_job/prediction/preprocess.py

### Training Code

In [30]:
%%writefile custom_train_job/train/train.py

import os
import sys
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_uri = os.environ['AIP_STORAGE_URI']

def build_model(train_data):
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=[len(train_data.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)
    
    model.compile(loss='mse',
        optimizer=optimizer,
        metrics=['mae', 'mse'])
    
    return model

def train_model(train_data, train_labels, epochs: int = 1000):
    
    print('[INFO] ------ Building Model Layers', file=sys.stderr)
    model = build_model(train_data)
    epochs = epochs
    
    # The patience parameter is the amount of epochs to check for improvement
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    
    print('[INFO] ------ Iterations / Training', file=sys.stderr)
    early_history = model.fit(train_data, train_labels, 
        epochs=epochs, validation_split = 0.2, 
        callbacks=[early_stop])
    
    print('[INFO] ------ Saving Model', file=sys.stderr)
    model.save(f'{model_uri}/mpg/model')

    return model

Overwriting custom_train_job/train/train.py


In [18]:
%%writefile custom_train_job/train/main.py

import sys
import preprocess
import train
import pandas as pd
from tensorflow import keras

dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight','Acceleration', 'Model Year', 'Origin']
dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment='\t',sep=" ", skipinitialspace=True)

## Clean, Normalize and Split Data

print('[INFO] ------ Preparing Data', file=sys.stderr)
train_data, train_labels, test_data, test_labels = preprocess.train_pre_process(dataset)

## Train model and save it in Google Cloud Storage

print('[INFO] ------ Training Model', file=sys.stderr)
train.train_model(train_data, train_labels)

Writing custom_train_job/train/main.py


### Container Image

In [19]:
%%writefile custom_train_job/train/Dockerfile

FROM python:latest

RUN python -m pip install --upgrade pip
RUN pip install pandas gcsfs tensorflow
COPY / /trainer

CMD ["python", "trainer/main.py"]

Writing custom_train_job/train/Dockerfile


In [20]:
!gcloud builds submit -t $TRAIN_IMAGE_URI custom_train_job/train/.

Creating temporary tarball archive of 5 file(s) totalling 4.1 KiB before compression.
Uploading tarball of [custom_train_job/train/.] to [gs://jchavezar-demo_cloudbuild/source/1662558550.233355-aff16ca48e1e41eeb67c1f2824ed0651.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/7bad8ad1-ab11-4457-a04a-9e967208da40].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/7bad8ad1-ab11-4457-a04a-9e967208da40?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "7bad8ad1-ab11-4457-a04a-9e967208da40"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1662558550.233355-aff16ca48e1e41eeb67c1f2824ed0651.tgz#1662558550877089
Copying gs://jchavezar-demo_cloudbuild/source/1662558550.233355-aff16ca48e1e41eeb67c1f2824ed0651.tgz#1662558550877089...
/ [1 files][  1.7 KiB/  1.7 KiB]                                                
Operation com

## Create Serving with Transformation

In [31]:
%%writefile custom_train_job/prediction/main.py

from fastapi import Request, FastAPI
import tensorflow as tf
import json
import os
import preprocess
import sys

app = FastAPI()

model_uri=os.environ['AIP_STORAGE_URI']
print(f'[INFO] ------ {model_uri}', file=sys.stderr)
model = tf.keras.models.load_model(f'{model_uri}/mpg/model')

@app.get('/')
def get_root():
    return {'message': 'Welcome mpg API: miles per gallon prediction'}

@app.get('/health_check')
def health():
    return 200

if os.environ.get('AIP_PREDICT_ROUTE') is not None:
    method = os.environ['AIP_PREDICT_ROUTE']
else:
    method = '/predict'

@app.post(method)
async def predict(request: Request):
    print("----------------- PREDICTING -----------------")
    body = await request.json()
    instances = body["instances"]
    norm_data = preprocess.pred_data_process(instances)
    outputs = model.predict(norm_data)
    response = outputs.tolist()
    print("----------------- OUTPUTS -----------------")
    return {"predictions": response}

Overwriting custom_train_job/prediction/main.py


## Container Image for Serving/Prediction

In [32]:
%%writefile custom_train_job/prediction/Dockerfile

FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7

COPY / /app
WORKDIR /app
RUN python -m pip install --upgrade pip
RUN pip install pandas gcsfs tensorflow
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

EXPOSE 8080

Overwriting custom_train_job/prediction/Dockerfile


In [33]:
!gcloud builds submit -t $PREDICT_IMAGE_URI custom_train_job/prediction/.

Creating temporary tarball archive of 4 file(s) totalling 3.1 KiB before compression.
Uploading tarball of [custom_train_job/prediction/.] to [gs://jchavezar-demo_cloudbuild/source/1662564407.842171-434467847fe845f5bad17af813209904.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/219ce390-6e0f-4654-aa5c-db3cb1e8502d].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/219ce390-6e0f-4654-aa5c-db3cb1e8502d?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "219ce390-6e0f-4654-aa5c-db3cb1e8502d"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1662564407.842171-434467847fe845f5bad17af813209904.tgz#1662564408309611
Copying gs://jchavezar-demo_cloudbuild/source/1662564407.842171-434467847fe845f5bad17af813209904.tgz#1662564408309611...
/ [1 files][  1.4 KiB/  1.4 KiB]                                                
Operatio

## Create Pipeline

In [39]:
## Training Worker Specs

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4"
        },
        "replica_count": "1",
        "container_spec": {
            "image_uri": TRAIN_IMAGE_URI,
            "env": [
                {
                    "name": "AIP_STORAGE_URI",
                    "value": AIP_STORAGE_URI
                },
            ]
        }
    }
]

In [40]:
from kfp.v2.dsl import pipeline
from kfp.v2.components import importer_node
from google_cloud_pipeline_components import aiplatform as gcc
from google_cloud_pipeline_components.types import artifact_types
from google_cloud_pipeline_components.v1.custom_job import CustomTrainingJobOp

@pipeline(name='custom-train')
def pipeline(
    project_id: str
):
    train_job = CustomTrainingJobOp(
        project=project_id,
        display_name='custom_train',
        worker_pool_specs=worker_pool_specs
    )
    import_unmanaged_model_op = importer_node.importer(
            artifact_uri=AIP_STORAGE_URI,
            artifact_class=artifact_types.UnmanagedContainerModel,
            metadata={
                "containerSpec": {
                    "imageUri": PREDICT_IMAGE_URI,
                    "env": [
                        {
                            "name": "PROJECT_ID",
                            "value": PROJECT_ID},
                    ],
                    "predictRoute": "/predict",
                    "healthRoute": "/health_check",
                    "ports": [
                        {
                            "containerPort": 8080
                        }
                    ]
                },
            },
    ).after(train_job)
    custom_model_upload_job = gcc.ModelUploadOp(
        project=PROJECT_ID,
        display_name="customjob-model",
        unmanaged_container_model=import_unmanaged_model_op.outputs["artifact"],
        ).after(import_unmanaged_model_op)
    endpoint_create_job = gcc.EndpointCreateOp(
        project=PROJECT_ID,
        display_name="cutomjob-endpoint",
    )
            
    custom_model_deploy_job = (gcc.ModelDeployOp(
        model=custom_model_upload_job.outputs["model"],
        endpoint=endpoint_create_job.outputs["endpoint"],
        deployed_model_display_name="cutomjob-deploy",
        traffic_split={"0":"100"},
        dedicated_resources_machine_type="n1-standard-2",
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1
        )).set_caching_options(False)

In [41]:
from kfp.v2 import compiler
import warnings
warnings.filterwarnings('ignore')

compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='custom_train.json')

In [42]:
import google.cloud.aiplatform as aip

job = aip.PipelineJob(
    display_name="custom_train",
    template_path="custom_train.json",
    pipeline_root=PIPELINE_ROOT_PATH,
    parameter_values={
        "project_id": PROJECT_ID
    },
)

job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/569083142710/locations/us-central1/pipelineJobs/custom-train-20220907121011
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/569083142710/locations/us-central1/pipelineJobs/custom-train-20220907121011')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/custom-train-20220907121011?project=569083142710
