# Initial Setup (Variables and Libraries)

![](../../../../images/custom-job-pytorch-1.png)

In [1]:
PROJECT_ID = 'jchavezar-demo'
TRAIN_IMAGE = 'gcr.io/jchavezar-demo/pytorch-custom-random-t:v2'
PREDICTION_IMAGE = 'gcr.io/jchavezar-demo/pytorch-custom-random-p:v2'
STAGING_BUCKET = 'gs://vtx-staging'

In [2]:
from google.cloud import aiplatform

# Training Block

![](../../../../images/custom-job-pytorch-2.png)

In [3]:
## Create Folder Code Files Structure
!rm -fr training
!mkdir training

## Create Training Code [PyTorch]

In [4]:
%%writefile training/train.py
#%%
import pandas as pd
from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

train = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/train.csv')
test = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/test.csv')
val = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/val.csv')

cat_col_names = [col for col in train.columns if 'cat' in col]
num_col_names = [col for col in train.columns if 'num' in col]

data_config = DataConfig(
    target=['target'], #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=1024,
    max_epochs=100,
    accelerator="auto", # can be 'cpu','gpu', 'tpu', or 'ipu' 
)
optimizer_config = OptimizerConfig()


head_config = LinearHeadConfig(
    layers="", # No additional layer in head, just a mapping layer to output_dim
    dropout=0.1,
    initialization="kaiming"
).__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

model_config = CategoryEmbeddingModelConfig(
    task="classification",
    layers="32-16", # Number of nodes in each layer
    activation="LeakyReLU", # Activation between each layers
    dropout=0.1,
    initialization="kaiming",
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
    learning_rate = 1e-3
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

tabular_model.fit(train=train, validation=val)
tabular_model.save_model('/gcs/vtx-models/pytorch/tabular_random')

Writing training/train.py


### Build Image and Push to GCR

In [5]:
%%writefile training/Dockerfile
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel

COPY . .
RUN pip install pytorch_tabular[extra]
RUN pip install gcsfs

ENTRYPOINT ["python", "train.py"]

Writing training/Dockerfile


In [6]:
!gcloud builds submit -t $TRAIN_IMAGE training/.

Creating temporary tarball archive of 2 file(s) totalling 2.2 KiB before compression.
Uploading tarball of [training/.] to [gs://jchavezar-demo_cloudbuild/source/1680015837.998502-9e36983ee02742669b5c1f03903fe6ad.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/5efed67e-58c4-4016-bcca-115291b0b930].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/5efed67e-58c4-4016-bcca-115291b0b930?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "5efed67e-58c4-4016-bcca-115291b0b930"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1680015837.998502-9e36983ee02742669b5c1f03903fe6ad.tgz#1680015838211406
Copying gs://jchavezar-demo_cloudbuild/source/1680015837.998502-9e36983ee02742669b5c1f03903fe6ad.tgz#1680015838211406...
/ [1 files][  1.2 KiB/  1.2 KiB]                                                
Operation completed over 1 

## Run Training CustomJob using Container Image

In [7]:
aiplatform.init(project=PROJECT_ID, staging_bucket=STAGING_BUCKET)

worker_pool_specs = [
        {
            "machine_spec": {
                "machine_type": "n1-standard-4",
                "accelerator_type": "NVIDIA_TESLA_T4",
                "accelerator_count": 1,
            },
            "replica_count": 1,
            "container_spec": {
                "image_uri": TRAIN_IMAGE,
                "command": [],
                "args": [],
            },
        }
    ]

my_job = aiplatform.CustomJob(
    display_name='pytorch_tabular_custom',
    worker_pool_specs=worker_pool_specs,
)

my_job.run()

Creating CustomJob
CustomJob created. Resource name: projects/569083142710/locations/us-central1/customJobs/3225139870833836032
To use this CustomJob in another session:
custom_job = aiplatform.CustomJob.get('projects/569083142710/locations/us-central1/customJobs/3225139870833836032')
View Custom Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/3225139870833836032?project=569083142710
CustomJob projects/569083142710/locations/us-central1/customJobs/3225139870833836032 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/3225139870833836032 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/3225139870833836032 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/3225139870833836032 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/3225139

# Prediction Block

![](../../../../images/custom-job-pytorch-3.png)

In [8]:
## Create Folder Code Files Structure
!rm -fr prediction
!mkdir prediction
!mkdir prediction/app
!mkdir prediction/app/tabular_random

## Create Prediction Code [Uvicorn:FastAPI PyTorch]

In [9]:
%%writefile prediction/app/main.py
#%%
import json
import os
import pandas as pd
from fastapi import Request, FastAPI
from pytorch_tabular import TabularModel
from starlette.responses import JSONResponse

app = FastAPI()
#columns = pd.read_csv('gs://vtx-datasets-public/pytorch_tabular/synthetic/train.csv', nrows=0).iloc[:,:-1].columns.to_list()
loaded_model = TabularModel.load_from_checkpoint("tabular_random")
#%%
@app.get('/health_check')
def health():
    return 200
if os.environ.get('AIP_PREDICT_ROUTE') is not None:
    method = os.environ['AIP_PREDICT_ROUTE']
else:
    method = '/predict'

@app.post(method)
async def predict(request: Request):
    print("----------------- PREDICTING -----------------")
    body = await request.json()
    instances = body["instances"]
    #data_pred = pd.DataFrame.from_dict(instances)
    #print(data_pred)
    #outputs = loaded_model.predict(data_pred)
    #response = outputs['prediction'].tolist()[0]
    output = []
    for i in instances:
        output.append(float(loaded_model.predict(pd.DataFrame.from_dict(i))["prediction"][0]))
    print(output)
    print("----------------- OUTPUTS -----------------")
    #return {
    #    "predictions": [{"probability": response}]
    #    }
    return JSONResponse({"predictions": output})

Writing prediction/app/main.py


### Build Image and Push to GCR

In [10]:
%%writefile prediction/Dockerfile
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel

COPY app /app
WORKDIR /app

RUN pip install pytorch_tabular[extra]
RUN pip install uvicorn fastapi
RUN pip install gcsfs

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

Writing prediction/Dockerfile


## Copy Model from GCS

In [11]:
!gsutil cp -r gs://vtx-models/pytorch/tabular_random prediction/app/

Copying gs://vtx-models/pytorch/tabular_random/callbacks.sav...
Copying gs://vtx-models/pytorch/tabular_random/config.yml...                    
Copying gs://vtx-models/pytorch/tabular_random/custom_params.sav...             
/ [3 files][888.7 KiB/888.7 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://vtx-models/pytorch/tabular_random/datamodule.sav...
Copying gs://vtx-models/pytorch/tabular_random/model.ckpt...                    
/ [5 files][  2.1 MiB/  2.1 MiB]                                                
Operation completed over 5 objects/2.1 MiB.                                      


## Create Container Image and Push it

In [12]:
!gcloud builds submit -t $PREDICTION_IMAGE prediction/.

Creating temporary tarball archive of 7 file(s) totalling 2.1 MiB before compression.
Uploading tarball of [prediction/.] to [gs://jchavezar-demo_cloudbuild/source/1680016763.966658-e020b9d8324140ae9ec36ce9d87af583.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/5a625c8a-1055-439e-8536-533f1015b97b].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/5a625c8a-1055-439e-8536-533f1015b97b?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "5a625c8a-1055-439e-8536-533f1015b97b"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1680016763.966658-e020b9d8324140ae9ec36ce9d87af583.tgz#1680016764795567
Copying gs://jchavezar-demo_cloudbuild/source/1680016763.966658-e020b9d8324140ae9ec36ce9d87af583.tgz#1680016764795567...
/ [1 files][  1.4 MiB/  1.4 MiB]                                                
Operation completed over 

## Upload to Model Registry

In [13]:
from google.cloud import aiplatform

PARAMETERS = {"sampled_shapley_attribution": {"path_count": 10}}
parameters = aiplatform.explain.ExplanationParameters(PARAMETERS)


EXPLANATION_METADATA = aiplatform.explain.ExplanationMetadata(
    inputs={
        "num_col_0": {},
        "num_col_1": {},
        "cat_col_2": {},
        "cat_col_3": {},
        "cat_col_4": {},
        "num_col_5": {},
        "num_col_6": {},
        "num_col_7": {},
        "num_col_8": {},
        "num_col_9": {},
        "num_col_10": {},
        "num_col_11": {},
        "num_col_12": {},
        "num_col_13": {},
        "num_col_14": {},
        "num_col_15": {},
        "num_col_16": {},
        "num_col_17": {},
        "cat_col_18": {},
        "num_col_19": {},
    },
    outputs={"probability": {}},
)

In [14]:
model = aiplatform.Model.upload(
    display_name="synthetic_data_pytorch_2",
    serving_container_image_uri=PREDICTION_IMAGE,
    serving_container_health_route="/health_check",
    serving_container_ports=[8080],
    explanation_parameters=parameters,
    explanation_metadata=EXPLANATION_METADATA,
)

Creating Model
Create Model backing LRO: projects/569083142710/locations/us-central1/models/9208719944903032832/operations/5164350933924052992
Model created. Resource name: projects/569083142710/locations/us-central1/models/9208719944903032832
To use this Model in another session:
model = aiplatform.Model('projects/569083142710/locations/us-central1/models/9208719944903032832')


In [15]:
endpoint = model.deploy(
    deployed_model_display_name='synthetic_data_pytorch_2',
    machine_type='a2-highgpu-1g',
    accelerator_type='NVIDIA_TESLA_A100',
    accelerator_count=1,
    min_replica_count=1,
    max_replica_count=1
)

Creating Endpoint
Create Endpoint backing LRO: projects/569083142710/locations/us-central1/endpoints/6155519091080691712/operations/103430852666458112
Endpoint created. Resource name: projects/569083142710/locations/us-central1/endpoints/6155519091080691712
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/569083142710/locations/us-central1/endpoints/6155519091080691712')
Deploying model to Endpoint : projects/569083142710/locations/us-central1/endpoints/6155519091080691712
Deploy Endpoint model backing LRO: projects/569083142710/locations/us-central1/endpoints/6155519091080691712/operations/2812346028529811456
Endpoint model deployed. Resource name: projects/569083142710/locations/us-central1/endpoints/6155519091080691712


In [16]:
%%writefile request.json
{
    "instances": [{
        "num_col_0": [-0.0166430796105736],
        "num_col_1": [-1.1673784944304106],
        "cat_col_2": [0.0],
        "cat_col_3": [0.0],
        "cat_col_4": [0.0],
        "num_col_5": [0.4192379163764471],
        "num_col_6": [-0.1479446488124239],
        "num_col_7": [-0.5070350236284159],
        "num_col_8": [-0.990552326644716],
        "num_col_9": [-3.995674031686334],
        "num_col_10": [-0.4228913875933527],
        "num_col_11": [0.1100728099704197],
        "num_col_12": [0.7011051904970438],
        "num_col_13": [-0.918551672105058],
        "num_col_14": [0.5458227322952036],
        "num_col_15": [-0.3471353451188136],
        "num_col_16": [0.559459501681792],
        "num_col_17": [1.1459262604154248],
        "cat_col_18": [3.0],
        "num_col_19": [1.0826087208281656]
        }]
}

Writing request.json


## Prediction

In [17]:
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://us-central1-aiplatform.googleapis.com/v1/projects/jchavezar-demo/locations/us-central1/endpoints/4920969843227754496:predict -d "@request.json"

{
  "predictions": [
    1
  ],
  "deployedModelId": "3893082601908338688",
  "model": "projects/569083142710/locations/us-central1/models/8181899229862559744",
  "modelDisplayName": "synthetic_data_pytorch_2",
  "modelVersionId": "1"
}


## Explainable

In [18]:
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" https://us-central1-aiplatform.googleapis.com/v1/projects/jchavezar-demo/locations/us-central1/endpoints/4920969843227754496:explain -d "@request.json"

{
  "explanations": [
    {
      "attributions": [
        {
          "instanceOutputValue": 1,
          "featureAttributions": {
            "num_col_15": [
              0
            ],
            "num_col_0": [
              0
            ],
            "cat_col_2": [
              0
            ],
            "num_col_10": [
              0
            ],
            "num_col_6": [
              0
            ],
            "cat_col_4": [
              0
            ],
            "num_col_8": [
              0
            ],
            "num_col_19": [
              0
            ],
            "cat_col_18": [
              0
            ],
            "num_col_5": [
              0.2
            ],
            "num_col_14": [
              0
            ],
            "num_col_7": [
              0
            ],
            "num_col_17": [
              0
            ],
            "num_col_16": [
              0
            ],
            "cat_col_3": [
              0
   