### Yaml file

In [3]:
%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-digestion:latest"

cat > data_ingest_component.yaml <<HERE

name: data_ingest
description: Download the dataset from GCS to pass to next component
inputs:
- {name: datapath, type: String}
outputs:
- {name: dataset, type: Dataset}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - data_ingest.py
    args:
    - --datapath
    - {inputValue: datapath}
    - --dataset
    - {outputUri: dataset}
HERE

In [5]:
%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-tfdv:latest"

cat > data_validation_component.yaml <<HERE

name: data_validation
description: Data Validation
inputs:
- {name: input_data, type: Dataset}
- {name: project_id, type: String}
- {name: gcs_temp_location, type: String}
- {name: region, type: String}
- {name: gcs_staging_location, type: String}
- {name: bucket, type: String}
outputs:
- {name: output_data, type: Dataset}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - tfdv.py
    args:
    - --input_data
    - {inputUri: input_data}
    - --dataset
    - {outputUri: dataset}
    - --project_id
    - {inputValue: project_id}
    - --gcs_temp_location
    - {inputValue: gcs_temp_location}
    - --region
    - {inputValue: region}
    - --gcs_staging_location
    - {inputValue: gcs_staging_location}
    - --bucket
    - {inputValue: bucket}
HERE

In [5]:
### YAML for data imputation

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-impute-store:latest"

cat > data_impute_component.yaml <<HERE

name: data_impute
description: Download the dataset from GCS to pass to next component
inputs:
- {name: pre_impute_dataset, type: Dataset}
- {name: bucket, type: String}
outputs:
- {name: post_impute_dataset, type: Dataset}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - impute_and_store.py
    args:
    - --pre_impute_dataset
    - {inputUri: pre_impute_dataset}
    - --bucket
    - {inputValue: bucket}
    - --post_impute_dataset
    - {outputUri: post_impute_dataset}
HERE

In [7]:
### YAML for data scaling and encoding

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-enc-scl-store:latest"

cat > data_enc_scl_store_component.yaml <<HERE

name: data_encoding_scaling_store
description: Fit transform OneHotEncoder and StandardScaler and upload model artifacts to GCS
inputs:
- {name: pre_enc_dataset, type: Dataset}
- {name: bucket_name, type: String}
outputs:
- {name: post_enc_dataset, type: Dataset}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - enc_scl_store.py
    args:
    - --pre_enc_dataset
    - {inputUri: pre_enc_dataset}
    - --bucket_name
    - {inputValue: bucket_name}
    - --post_enc_dataset
    - {outputUri: post_enc_dataset}
HERE

In [9]:
### YAML for data hyperparameter tuning

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-hyperparameter_tuning:latest"

cat > hyperparameter_tuning_component.yaml <<HERE

name: hyperparameter_tuning
description: Perform Hyperparameter Tuning and Store Data inside GCS as json
inputs:
- {name: dataset, type: Dataset}
- {name: bucket_name, type: String}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - hyperparameter_tuning.py
    args:
    - --dataset
    - {inputUri: dataset}
    - --bucket_name
    - {inputValue: bucket_name}
HERE

In [11]:
### YAML for data training

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-training:latest"

cat > training_component.yaml <<HERE

name: training
description: Perform training and store artifacts
inputs:
- {name: dataset, type: Dataset}
- {name: bucket_name, type: String}
outputs:
- {name: model, type: Model}
- {name: classification_metrics, type: ClassificationMetrics}
- {name: base_metrics, type: Metrics}
- {name: feature_importance, type: Dataset}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - training.py
    args:
    - --dataset
    - {inputUri: dataset}
    - --bucket_name
    - {inputValue: bucket_name}
    - --model
    - {outputUri: model}
    - --classification_metrics
    - {outputUri: classification_metrics}
    - --base_metrics
    - {outputUri: base_metrics}
    - --feature_importance
    - {outputUri: feature_importance}
HERE

In [2]:
### YAML for data training

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-check_model_metrics:latest"

cat > check_model_metrics_component.yaml <<HERE

name: check_model_metrics
description: Perform model metrics check for deployment
inputs:
- {name: metrics, type: String}
- {name: threshold_dict, type: String}
outputs:
- {name: deploy, type: String}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - check_model_metrics.py
    args:
    - --base_metrics
    - {inputValue: metrics}
    - --threshold_dict
    - {inputValue: threshold_dict}
    - --deploy
    - {outputUri: deploy}

HERE

In [3]:
### YAML for data training

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-deploy_model:latest"

cat > deploy_model_component.yaml <<HERE

name: deploy_model
description: Deploy
inputs:
- {name: model, type: String}
- {name: project, type: String}
- {name: region, type: String}
- {name: serving_container_image_uri, type: String}
implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - deploy_model.py
    args:
    - --model
    - {inputUri: model}
    - --project
    - {inputValue: project}
    - --region
    - {inputValue: region}
    - --serving_container_image_uri
    - {inputValue: serving_container_image_uri}

HERE

In [4]:
### YAML for data training

%%bash

GCR_IMAGE="gcr.io/churn-smu/churn-data-endpoint_test:latest"

cat > endpoint_test_component.yaml <<HERE

name: endpoint_test

description: Test endpoint

inputs:
- {name: endpoint, type: String}
- {name: project, type: String}
- {name: region, type: String}

implementation:
  container:
    image: $GCR_IMAGE
    command:
    - python
    - endpoint_test.py
    args:
    - --project
    - {inputValue: project}
    - --region
    - {inputValue: region}
    - --endpoint
    - {inputValue: endpoint}

HERE