## Set Variables

In [11]:
PROJECT_ID = 'jchavezar-demo'
TRAIN_IMAGE = 'gcr.io/jchavezar-demo/pytorch-custom-random-t:v2'
PREDICTION_IMAGE = 'gcr.io/jchavezar-demo/pytorch-custom-random-p:v2'
STAGING_BUCKET = 'gs://vtx-staging'

# Training Block

In [3]:
## Create Folder Code Files Structure
!rm -fr training
!mkdir training

## Create Training Code [PyTorch]

In [4]:
%%writefile training/train.py
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import random
import numpy as np
import pandas as pd
import shutil
import os

def make_mixed_classification(n_samples, n_features, n_categories):
    X,y = make_classification(n_samples=n_samples, n_features=n_features, random_state=42, n_informative=5)
    cat_cols = random.choices(list(range(X.shape[-1])),k=n_categories)
    num_cols = [i for i in range(X.shape[-1]) if i not in cat_cols]
    for col in cat_cols:
        X[:,col] = pd.qcut(X[:,col], q=4).codes.astype(int)
    col_names = [] 
    num_col_names=[]
    cat_col_names=[]
    for i in range(X.shape[-1]):
        if i in cat_cols:
            col_names.append(f"cat_col_{i}")
            cat_col_names.append(f"cat_col_{i}")
        if i in num_cols:
            col_names.append(f"num_col_{i}")
            num_col_names.append(f"num_col_{i}")
    X = pd.DataFrame(X, columns=col_names)
    y = pd.Series(y, name="target")
    data = X.join(y)
    return data, cat_col_names, num_col_names

def print_metrics(y_true, y_pred, tag):
    if isinstance(y_true, pd.DataFrame) or isinstance(y_true, pd.Series):
        y_true = y_true.values
    if isinstance(y_pred, pd.DataFrame) or isinstance(y_pred, pd.Series):
        y_pred = y_pred.values
    if y_true.ndim>1:
        y_true=y_true.ravel()
    if y_pred.ndim>1:
        y_pred=y_pred.ravel()
    val_acc = accuracy_score(y_true, y_pred)
    val_f1 = f1_score(y_true, y_pred)
    print(f"{tag} Acc: {val_acc} | {tag} F1: {val_f1}")


data, cat_col_names, num_col_names = make_mixed_classification(n_samples=10000, n_features=20, n_categories=4)
train, test = train_test_split(data, random_state=42)
train, val = train_test_split(train, random_state=42)
path = os.path.join('/gcs/vtx-datasets-public', 'synthetic_data')
if os.path.exists(path):
    shutil.rmtree(path)
os.mkdir(path)
test.to_csv(f'{path}/test.csv', index=False)

from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

data_config = DataConfig(
    target=['target'], #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=1024,
    max_epochs=100,
    accelerator="auto", # can be 'cpu','gpu', 'tpu', or 'ipu' 
)
optimizer_config = OptimizerConfig()


head_config = LinearHeadConfig(
    layers="", # No additional layer in head, just a mapping layer to output_dim
    dropout=0.1,
    initialization="kaiming"
).__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

model_config = CategoryEmbeddingModelConfig(
    task="classification",
    layers="32-16", # Number of nodes in each layer
    activation="LeakyReLU", # Activation between each layers
    dropout=0.1,
    initialization="kaiming",
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
    learning_rate = 1e-3
)


tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

tabular_model.fit(train=train, validation=val)
tabular_model.save_model('/gcs/vtx-models/pytorch/tabular_random')

Writing source/train.py


### Build Image and Push to GCR

In [5]:
%%writefile training/Dockerfile
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel

COPY . .
RUN pip install pytorch_tabular[extra]

ENTRYPOINT ["python", "train.py"]

Writing source/Dockerfile


In [None]:
!gcloud builds submit -t $TRAIN_IMAGE training/.

## Run Training CustomJob using Container Image

In [10]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, staging_bucket=STAGING_BUCKET)

worker_pool_specs = [
        {
            "machine_spec": {
                "machine_type": "n1-standard-4",
                "accelerator_type": "NVIDIA_TESLA_T4",
                "accelerator_count": 1,
            },
            "replica_count": 1,
            "container_spec": {
                "image_uri": TRAIN_IMAGE,
                "command": [],
                "args": [],
            },
        }
    ]

my_job = aiplatform.CustomJob(
    display_name='pytorch_tabular_custom',
    worker_pool_specs=worker_pool_specs,
)

my_job.run()

Creating CustomJob
CustomJob created. Resource name: projects/569083142710/locations/us-central1/customJobs/376802222521974784
To use this CustomJob in another session:
custom_job = aiplatform.CustomJob.get('projects/569083142710/locations/us-central1/customJobs/376802222521974784')
View Custom Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/376802222521974784?project=569083142710
CustomJob projects/569083142710/locations/us-central1/customJobs/376802222521974784 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/376802222521974784 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/376802222521974784 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/376802222521974784 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/569083142710/locations/us-central1/customJobs/37680222252197

# Prediction Block

In [56]:
## Create Folder Code Files Structure
!rm -fr prediction
!mkdir prediction
!mkdir prediction/app
!mkdir prediction/app/tabular_random

## Create Prediction Code [Uvicorn:FastAPI PyTorch]

In [57]:
%%writefile prediction/app/main.py
import json
import os
import pandas as pd
from fastapi import Request, FastAPI
from pytorch_tabular import TabularModel

app = FastAPI()

loaded_model = TabularModel.load_from_checkpoint("tabular_random")

@app.get('/health_check')
def health():
    return 200
if os.environ.get('AIP_PREDICT_ROUTE') is not None:
    method = os.environ['AIP_PREDICT_ROUTE']
else:
    method = '/predict'

@app.post(method)
async def predict(request: Request):
    print("----------------- PREDICTING -----------------")
    body = await request.json()
    instances = body["instances"]
    columns = ['num_col_0','cat_col_1','num_col_2','num_col_3','cat_col_4',
    'num_col_5','cat_col_6','num_col_7','num_col_8','cat_col_9','num_col_10',
    'num_col_11','num_col_12','num_col_13','num_col_14','num_col_15','num_col_16',
    'num_col_17','num_col_18','num_col_19']

    data_pred = pd.DataFrame([instances],columns=columns)
    outputs = loaded_model.predict(data_pred)
    response = outputs['prediction'].tolist()
    print("----------------- OUTPUTS -----------------")
    return {"predictions": response}

Writing prediction/app/main.py


### Build Image and Push to GCR

In [61]:
%%writefile prediction/Dockerfile
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel

COPY app /app
WORKDIR /app

RUN pip install pytorch_tabular[extra]
RUN pip install uvicorn fastapi

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

Overwriting prediction/Dockerfile


## Copy Model from GCS

In [62]:
!gsutil cp -r gs://vtx-models/pytorch/tabular_random/*.* prediction/app/tabular_random/.

Copying gs://vtx-models/pytorch/tabular_random/callbacks.sav...
Copying gs://vtx-models/pytorch/tabular_random/config.yml...                    
Copying gs://vtx-models/pytorch/tabular_random/custom_params.sav...             
Copying gs://vtx-models/pytorch/tabular_random/datamodule.sav...                
/ [4 files][  2.2 MiB/  2.2 MiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://vtx-models/pytorch/tabular_random/model.ckpt...
/ [5 files][  2.2 MiB/  2.2 MiB]                                                
Operation completed over 5 objects/2.2 MiB.                                      


## Create Container Image and Push it

In [63]:
!gcloud builds submit -t $PREDICTION_IMAGE prediction/.

Creating temporary tarball archive of 7 file(s) totalling 2.2 MiB before compression.
Uploading tarball of [prediction/.] to [gs://jchavezar-demo_cloudbuild/source/1679369929.449695-72898d9129914afaaa3c9fc037f16c5f.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/1e68b91c-142e-4d86-bcca-39fc37fc49e1].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/1e68b91c-142e-4d86-bcca-39fc37fc49e1?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "1e68b91c-142e-4d86-bcca-39fc37fc49e1"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1679369929.449695-72898d9129914afaaa3c9fc037f16c5f.tgz#1679369930444749
Copying gs://jchavezar-demo_cloudbuild/source/1679369929.449695-72898d9129914afaaa3c9fc037f16c5f.tgz#1679369930444749...
/ [1 files][  1.4 MiB/  1.4 MiB]                                                
Operation completed over 

## Upload to Model Registry

In [None]:
model = aiplatform.Model.upload(
    display_name="synthetic_data_pytorch",
    serving_container_image_uri=PREDICTION_IMAGE,
    serving_container_health_route="/health_check",
    serving_container_ports=[8080]
)

Creating Model
Create Model backing LRO: projects/569083142710/locations/us-central1/models/7943630662076989440/operations/6286877539092660224


In [65]:
endpoint = model.deploy(
    deployed_model_display_name='synthetic_data_pytorch',
    machine_type='a2-highgpu-1g',
    accelerator_type='NVIDIA_TESLA_A100',
    accelerator_count=1,
    min_replica_count=1,
    max_replica_count=1
)

Creating Endpoint
Create Endpoint backing LRO: projects/569083142710/locations/us-central1/endpoints/9078777461709209600/operations/6746244701084450816
Endpoint created. Resource name: projects/569083142710/locations/us-central1/endpoints/9078777461709209600
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/569083142710/locations/us-central1/endpoints/9078777461709209600')
Deploying model to Endpoint : projects/569083142710/locations/us-central1/endpoints/9078777461709209600
Deploy Endpoint model backing LRO: projects/569083142710/locations/us-central1/endpoints/9078777461709209600/operations/6575107915244371968
Endpoint model deployed. Resource name: projects/569083142710/locations/us-central1/endpoints/9078777461709209600
