In [22]:
!pip install google-cloud-aiplatform
!pip install google-cloud-storage



In [30]:
from google.cloud import aiplatform
from google.cloud import storage
import datetime
import os

In [42]:
PROJECT_ID = 'YOUR-PROJECT'
CONTAINER_URI = f'gcr.io/{PROJECT_ID}/demo-kedro-spaceflights'
BUCKET_NAME='demo-kedro-spaceflights'
BUCKET_URI=f'gs://{BUCKET_NAME}'

MACHINE_TYPE='n1-standard-4'
REPLICA_COUNT=1
REGION='us-central1'

## Create GCS bucket

In [43]:
# Instantiates a client
storage_client = storage.Client()

In [44]:
buckets = storage_client.list_buckets()
bucket_names = [bucket.name for bucket in buckets]
bucket_names

['artifacts.lively-crane-384007.appspot.com',
 'demo-kedro-iris',
 'demo-kedro-vertexai-spaceflights',
 'gcf-sources-902191593306-us-west1',
 'telco-data-mocker',
 'telco-datamocker',
 'us.artifacts.lively-crane-384007.appspot.com']

In [45]:
if not BUCKET_NAME in bucket_names:
    bucket = storage_client.bucket(BUCKET_NAME)
    new_bucket = storage_client.create_bucket(bucket, location=REGION)
    print("Created bucket {} in {} with storage class {}"
          .format(
              new_bucket.name, new_bucket.location, new_bucket.storage_class
        ))
    # Upload the spaceflight data to Bucket
    os.environ['BUCKET_URI'] = BUCKET_URI
    !gsutil -m cp -r ../data/01_raw $BUCKET_URI/data/01_raw

Created bucket demo-kedro-spaceflights in US-CENTRAL1 with storage class STANDARD
Copying file://../data/01_raw/shuttles.csv [Content-Type=text/csv]...
Copying file://../data/01_raw/reviews.csv [Content-Type=text/csv]...
Copying file://../data/01_raw/.gitkeep [Content-Type=application/octet-stream]...
Copying file://../data/01_raw/companies.csv [Content-Type=text/csv]...          
Copying file://../data/01_raw/.ipynb_checkpoints/shuttles-checkpoint.csv [Content-Type=text/csv]...
Copying file://../data/01_raw/.ipynb_checkpoints/reviews-checkpoint.csv [Content-Type=text/csv]...
Copying file://../data/01_raw/.ipynb_checkpoints/companies-checkpoint.csv [Content-Type=text/csv]...
\ [7/7 files][ 22.5 MiB/ 22.5 MiB] 100% Done                                    
Operation completed over 7 objects/22.5 MiB.                                     


## Customize Kedro project for GCP

Configuration to run the kedro on GCP is customized in `GCP-Kedro-spaceflight/conf/gcp`:

- `catalog.yaml`: Change the paths of data to GCP storage services (GCS, BigQuery)
- `spark.yaml`: Change the spark config to enable I/O with `gcs.GoogleHadoopFileSystem`


Switching to run on GCP by specify the environment in Kedro CLI
```
kedro run --env=gcp
```

## Containerize Kedro project

Build docker image

```
docker build \
 -t gcr.io/$PROJECT_ID/$GCR_REPO_NAME:latest \
 .
```

Test locally (or on Vertex workbench):

```
docker run gcr.io/$PROJECT_ID/$GCR_REPO_NAME:latest
```

Push the image to Container/Artifact registry:

```
docker push gcr.io/$PROJECT_ID/$GCR_REPO_NAME:latest
```

## Run a Kedro pipeline as Vertex job

In [48]:
aiplatform.init(project=PROJECT_ID, staging_bucket=f'{BUCKET_URI}/vertex_staging')

In [49]:
JOB_NAME = 'test_kedro_spaceflights'
run_timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

job = aiplatform.CustomContainerTrainingJob(
    display_name=f"{JOB_NAME}_{run_timestamp}",
    container_uri=CONTAINER_URI,
)

print(job)

<google.cloud.aiplatform.training_jobs.CustomContainerTrainingJob object at 0x7fe0a0e4ea90>


In [50]:
model = job.run(
    replica_count=REPLICA_COUNT,
    machine_type=MACHINE_TYPE
)

Training Output directory:
gs://demo-kedro-spaceflights/vertex_staging/aiplatform-custom-training-2023-04-25-10:26:43.927 
View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/1909360628066156544?project=902191593306
View backing custom job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/5434342130157879296?project=902191593306
CustomContainerTrainingJob projects/902191593306/locations/us-central1/trainingPipelines/1909360628066156544 current state:
PipelineState.PIPELINE_STATE_RUNNING
CustomContainerTrainingJob projects/902191593306/locations/us-central1/trainingPipelines/1909360628066156544 current state:
PipelineState.PIPELINE_STATE_RUNNING
CustomContainerTrainingJob projects/902191593306/locations/us-central1/trainingPipelines/1909360628066156544 current state:
PipelineState.PIPELINE_STATE_RUNNING
CustomContainerTrainingJob projects/902191593306/locations/us-central1/trainingPipelines/1909360628066156544 current sta

## Vertex Job / Logging

Kedro pipeline run as a Vertex Custom Job

![](./images/vertex-custom-job.png)

Logging

![](./images/vertex-job-logging.png)