### Configure Environment, and Build 'Custom Training Container'
In order to do custom model training, with RAPIDS, on GCP's AI Platform, we first need to build a container that encapsulates the models and code required.

In [None]:
## GCLOUD_BIN_PATH=[path to the location where 'gcloud' bin is installed]
## See: https://cloud.google.com/sdk/install
import json
import os
import subprocess

GCLOUD_BIN_PATH = "[/path/to/gcloud/location]"
GCP_PROJECT_NAME = "[YOUR PROJECT NAME]"
GCP_STORAGE_PATH = "[PATH TO GCP STORAGE LOCATION]" # Ex. gs://[path_to_your_data]/subdir


gcloud_env = os.environ.copy()
gcloud_env["PATH"] = f"{gcloud_env['PATH']}:{GCLOUD_BIN_PATH}"
def exec_cmd_and_return(*popenargs, **kwargs):
    process = subprocess.Popen(stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                               env=gcloud_env, *popenargs, **kwargs)
    output, err = process.communicate()
    return_code = process.poll()

    output = str(output.decode())

    return (return_code, output + str(err))

command = "docker build -t gcp_training_test --file Dockerfile.training ./".split()
_, result = exec_cmd_and_return(command)
print(result)

command = f"docker tag gcp_training_test:latest gcr.io/{GCP_PROJECT_NAME}/gcp_rapids_training:latest".split()
_, result = exec_cmd_and_return(command)
print(result)

### Push 'Custom Training Container'
Once our container has successfully been built, we push it to the Google Container Registry (GCR).

In [None]:
command = f"docker push gcr.io/{GCP_PROJECT_NAME}/gcp_rapids_training:latest".split()
_, result = exec_cmd_and_return(command)
print(result)

command = "gcloud auth configure-docker".split()
_, result = exec_cmd_and_return(command)
print(result)

### Create Training Script
Starting a custom training job on GCP requires you to define a configuration describing what to test, and how hyper-parameter optimization should work.

In [None]:
config_name = "gcloud_training_config.json"
config = {
    "trainingInput": {
        "args": [
            "--train",
            "--do-hpo",
            "--hpo-num-bins=64",
            "--cloud-type=GCP",
            "--compute-type=GPU",
            f"--data-input-path=gs://{GCP_STORAGE_PATH}",
            f"--data-output-path=gs://{GCP_STORAGE_PATH}/training_output",
            "--data-name=airline_20000000.orc",
            "--model-type=RandomForest"
        ],
        "hyperparameters": {
            "enableTrialEarlyStopping": True,
            "goal": "MAXIMIZE",
            "hyperparameterMetricTag": "hpo_accuracy",
            "maxParallelTrials": 1,
            "maxTrials": 1,
            "maxFailedTrials": 1,
            "params": [
                {
                    "maxValue": 600,
                    "minValue": 100,
                    "parameterName": "hpo-num-est",
                    "type": "INTEGER"
                },
                {
                    "maxValue": 20,
                    "minValue": 9,
                    "parameterName": "hpo-max-depth",
                    "type": "INTEGER"
                },
                {
                    "maxValue": 0.6,
                    "minValue": 0.2,
                    "parameterName": "hpo-max-features",
                    "type": "DOUBLE"
                }
            ]
        },
        "jobDir": f"gs://{GCP_STORAGE_PATH}/training_output",
        "mainConfig": {
            "imageUri": f"gcr.io/{GCP_PROJECT_NAME}/gcp_rapids_training:latest",
            "acceleratorConfig": {
                "count": "1",
                "type": "NVIDIA_TESLA_T4"
            }
        },
        "mainType": "n1-standard-8",
        "region": "us-west1",
        "scaleTier": "CUSTOM"
    }
}

with open(config_name, 'w') as writer:
    writer.write(json.dumps(config, indent=4, sort_keys=True))


In [None]:
experiment_name = "test_experiment_01"
command = f"gcloud ai-platform jobs submit training {experiment_name} --config ./{config_name}".split()

_, result = exec_cmd_and_return(command)

print(_)
print(result)
