# Basics Pipelines

<center><img src="images/custom-job-pytorch[ro]-4.png"/></center>

## Pipelines Components


### Create Code/Folder Structure and Set Environment

In [25]:
PROJECT_ID = 'jchavezar-demo'
TRAIN_IMAGE = 'gcr.io/jchavezar-demo/pytorch-custom-synthetic-pipe-train:v1'
PREDICTION_IMAGE = 'gcr.io/jchavezar-demo/pytorch-custom-synthetic-pipe-predict:v1'
STAGING_BUCKET = 'gs://vtx-staging'

In [26]:
from google.cloud import aiplatform

#### Training Code

In [27]:
!rm -fr training
!mkdir training

In [28]:
%%writefile training/train.py
#%%
import pandas as pd
from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

train = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/train.csv')
test = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/test.csv')
val = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/val.csv')

cat_col_names = [col for col in train.columns if 'cat' in col]
num_col_names = [col for col in train.columns if 'num' in col]

data_config = DataConfig(
    target=['target'], #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=1024,
    max_epochs=100,
    accelerator="auto", # can be 'cpu','gpu', 'tpu', or 'ipu' 
)
optimizer_config = OptimizerConfig()


head_config = LinearHeadConfig(
    layers="", # No additional layer in head, just a mapping layer to output_dim
    dropout=0.1,
    initialization="kaiming"
).__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

model_config = CategoryEmbeddingModelConfig(
    task="classification",
    layers="32-16", # Number of nodes in each layer
    activation="LeakyReLU", # Activation between each layers
    dropout=0.1,
    initialization="kaiming",
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
    learning_rate = 1e-3
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

tabular_model.fit(train=train, validation=val)
tabular_model.save_model('/gcs/vtx-models/pytorch/tabular_random')

Writing training/train.py


In [29]:
%%writefile training/Dockerfile
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel

COPY . .
RUN pip install pytorch_tabular[extra]
RUN pip install gcsfs

ENTRYPOINT ["python", "train.py"]

Writing training/Dockerfile


In [30]:
!gcloud builds submit -t $TRAIN_IMAGE training/.

Creating temporary tarball archive of 2 file(s) totalling 2.2 KiB before compression.
Uploading tarball of [training/.] to [gs://jchavezar-demo_cloudbuild/source/1680027737.362494-a32f0e49889c4075b5043a62268dbb5d.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/be671ab4-18c1-47a6-8716-581c9699c382].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/be671ab4-18c1-47a6-8716-581c9699c382?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "be671ab4-18c1-47a6-8716-581c9699c382"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1680027737.362494-a32f0e49889c4075b5043a62268dbb5d.tgz#1680027737566676
Copying gs://jchavezar-demo_cloudbuild/source/1680027737.362494-a32f0e49889c4075b5043a62268dbb5d.tgz#1680027737566676...
/ [1 files][  1.2 KiB/  1.2 KiB]                                                
Operation completed over 1 

#### Prediction Code

In [31]:
!rm -fr prediction
!mkdir prediction
!mkdir prediction/app

In [32]:
%%writefile prediction/app/main.py

#%%
import json
import os
import pandas as pd
from fastapi import Request, FastAPI
from pytorch_tabular import TabularModel
from starlette.responses import JSONResponse

app = FastAPI()
#columns = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/train.csv', nrows=0).iloc[:,:-1].columns.to_list()
loaded_model = TabularModel.load_from_checkpoint("tabular_random")
#%%
@app.get('/health_check')
def health():
    return 200
if os.environ.get('AIP_PREDICT_ROUTE') is not None:
    method = os.environ['AIP_PREDICT_ROUTE']
else:
    method = '/predict'

@app.post(method)
async def predict(request: Request):
    print("----------------- PREDICTING -----------------")
    body = await request.json()
    instances = body["instances"]
    output = []
    for i in instances:
        output.append(float(loaded_model.predict(pd.DataFrame.from_dict(i))["prediction"][0]))
    print(output)
    print("----------------- OUTPUTS -----------------")
    #return {
    #    "predictions": [{"probability": response}]
    #    }
    return JSONResponse({"predictions": output})

Writing prediction/app/main.py


In [33]:
%%writefile prediction/Dockerfile

FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel

COPY app /app
WORKDIR /app

RUN pip install pytorch_tabular[extra]
RUN pip install uvicorn fastapi
RUN pip install gcsfs

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

Writing prediction/Dockerfile


In [34]:
!gsutil cp -r gs://vtx-models/pytorch/tabular_random prediction/app/

Copying gs://vtx-models/pytorch/tabular_random/callbacks.sav...
Copying gs://vtx-models/pytorch/tabular_random/config.yml...                    
Copying gs://vtx-models/pytorch/tabular_random/custom_params.sav...             
/ [3 files][888.7 KiB/888.7 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://vtx-models/pytorch/tabular_random/datamodule.sav...
Copying gs://vtx-models/pytorch/tabular_random/model.ckpt...                    
/ [5 files][  2.1 MiB/  2.1 MiB]                                                
Operation completed over 5 objects/2.1 MiB.                                      


In [35]:
!gcloud builds submit -t $PREDICTION_IMAGE prediction/.

Creating temporary tarball archive of 7 file(s) totalling 2.1 MiB before compression.
Uploading tarball of [prediction/.] to [gs://jchavezar-demo_cloudbuild/source/1680028402.103959-82fd34c892994ae0bd13a46a985b7731.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/eee1ea27-78d5-48db-9592-5a9a145303b5].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/eee1ea27-78d5-48db-9592-5a9a145303b5?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "eee1ea27-78d5-48db-9592-5a9a145303b5"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1680028402.103959-82fd34c892994ae0bd13a46a985b7731.tgz#1680028402944108
Copying gs://jchavezar-demo_cloudbuild/source/1680028402.103959-82fd34c892994ae0bd13a46a985b7731.tgz#1680028402944108...
/ [1 files][  1.4 MiB/  1.4 MiB]                                                
Operation completed over 

In [36]:
!rm -fr training
!rm -fr prediction

## Creating Pipelines

### Explainable AI

In [38]:
PARAMETERS = {"sampled_shapley_attribution": {"path_count": 10}}
parameters = aiplatform.explain.ExplanationParameters(PARAMETERS)


EXPLANATION_METADATA = aiplatform.explain.ExplanationMetadata(
    inputs={
        "num_col_0": {},
        "num_col_1": {},
        "cat_col_2": {},
        "cat_col_3": {},
        "cat_col_4": {},
        "num_col_5": {},
        "num_col_6": {},
        "num_col_7": {},
        "num_col_8": {},
        "num_col_9": {},
        "num_col_10": {},
        "num_col_11": {},
        "num_col_12": {},
        "num_col_13": {},
        "num_col_14": {},
        "num_col_15": {},
        "num_col_16": {},
        "num_col_17": {},
        "cat_col_18": {},
        "num_col_19": {},
    },
    outputs={"probability": {}},
)

In [39]:
from google_cloud_pipeline_components.v1 import custom_job, model, endpoint
from google_cloud_pipeline_components.types import artifact_types
from google_cloud_pipeline_components.aiplatform import ModelUploadOp
from kfp.dsl import pipeline, importer
from kfp import compiler

## Worker pool spec for training
worker_pool_specs = [
        {
            "machine_spec": {
                "machine_type": "n1-standard-4",
                "accelerator_type": "NVIDIA_TESLA_T4",
                "accelerator_count": 1,
            },
            "replica_count": 1,
            "container_spec": {
                "image_uri": TRAIN_IMAGE,
            },
        }
    ]

@pipeline(name="pytorch-tabular-gpu")
def pipeline(
    project_id: str,
    display_name: str,
):
    train_task = custom_job.CustomTrainingJobOp(
        display_name=f"{display_name}-train",
        project=project_id,
        worker_pool_specs=worker_pool_specs
    )
    import_unmanaged_model_task = importer(
        artifact_uri= "gs://vtx-models/pytorch/tabular_random",
        artifact_class=artifact_types.UnmanagedContainerModel, 
        metadata={
            "containerSpec": {
            "imageUri": PREDICTION_IMAGE,
            "healthRoute": "/health_check",
            "ports": [{"containerPort": 8080}]
            }
        }
    ).after(train_task)
    model_upload_task = model.ModelUploadOp(
        display_name=f"{display_name}-model",
        project=project_id,
        unmanaged_container_model=import_unmanaged_model_task.outputs["artifact"],
        #explanation_parameters=parameters,
        #explanation_metadata=metadata,
    )
    endpoint_create_task = endpoint.EndpointCreateOp(
        project=project_id,
        display_name=f"{display_name}-endpoint",
    )
    endpoint.ModelDeployOp(
        endpoint=endpoint_create_task.outputs["endpoint"],
        model=model_upload_task.outputs["model"],
        deployed_model_display_name=f"{display_name}-endpoint-op",
        dedicated_resources_machine_type="a2-highgpu-1g",
        dedicated_resources_accelerator_type="NVIDIA_TESLA_A100",
        dedicated_resources_accelerator_count=1,
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    )

In [41]:
# Compile File
from kfp import compiler

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path='pytorch-gpu.json')

## Run Pipeline Job

In [42]:
from google.cloud import aiplatform

# Initialize the aiplatform package
aiplatform.init(
    project="jchavezar-demo",
    location='us-central1',
    staging_bucket="gs://vtx-staging")

In [43]:
# Create a job via tag and with different 
job = aiplatform.PipelineJob(
    display_name="pytorch-tabular-run",
    template_path="pytorch-gpu.json",
    parameter_values={
        "project_id": "jchavezar-demo", 
        "display_name": "pytorch-tab-pipe"}
)
job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/569083142710/locations/us-central1/pipelineJobs/pytorch-tabular-gpu-20230328184734
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/569083142710/locations/us-central1/pipelineJobs/pytorch-tabular-gpu-20230328184734')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/pytorch-tabular-gpu-20230328184734?project=569083142710
