In [None]:
PROJECT_ID = "prj-d-bu3machine-learning-ma6i"
REGION = "us-central1"
BUCKET_URI = "gs://bkt-d-vertexbucket"
KMS_KEY = "projects/prj-d-kms-3i3k/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-d-bu3machine-learning"
COMPUTE_ENGINE_SA = "401570045548-compute@developer.gserviceaccount.com"
PEER_NETWORK="projects/316945073583/global/networks/vpc-d-shared-restricted"

In [None]:
DATA_PATH = "data"
KFP_COMPONENTS_PATH = "components"
SRC = "src"
BUILD = "build"
Image = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/pipelinerepo/pipeline_tutorial:latest"

#export GOOGLE_APPLICATION_CREDENTIALS = 
!mkdir -m 777 -p {SRC} {DATA_PATH} {KFP_COMPONENTS_PATH} {BUILD}

In [None]:
%%writefile Dockerfile.net
FROM {REGION}-docker.pkg.dev/{PROJECT_ID}/pipelinerepo/net-debug
RUN pip install kfp==2.5.0

In [None]:
%%writefile Dockerfile.gcloud
FROM {REGION}-docker.pkg.dev/{PROJECT_ID}/pipelinerepo/net-debug
RUN pip install kfp==2.5.0

In [None]:
NET_DEBUG_IMAGE="{REGION}-docker.pkg.dev/{PROJECT_ID}/pipelinerepo/net-debug"
GCLOUD_IMAGE="{REGION}-docker.pkg.dev/{PROJECT_ID}/pipelinerepo/gcloud"

In [None]:
!docker build . -f ./Dockerfile.net -t "{NET_DEBUG_IMAGE}"

In [None]:
!docker build . -f ./Dockerfile.gcloud -t "{GCLOUD_IMAGE}"

In [None]:
!echo Y | gcloud auth configure-docker {REGION}-docker.pkg.dev

In [None]:
!docker push "{NET_DEBUG_IMAGE}"

In [None]:
!docker push "{GCLOUD_IMAGE}"

In [None]:
!pip install protobuf==3.20.* tensorflow==2.8.0  tensorflow-hub==0.13.0 kfp==2.5.0

In [None]:
from pathlib import Path as path
from urllib.parse import urlparse
import os
from six.moves import urllib
import tempfile
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import tensorflow_hub as hub
from google.cloud import aiplatform
from google.cloud import bigquery
from google.api_core.exceptions import GoogleAPIError
from kfp import compiler, dsl
from kfp.dsl import component
from kfp.dsl import Input, Output, Model, Metrics, OutputPath
from typing import NamedTuple

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

# Pipeline

In [None]:
@component(
        base_image = f"{GCLOUD_IMAGE}"
)
def arbitrary() -> str:
    import subprocess
    bash_command = "gcloud config list"
    result = subprocess.run(bash_command, shell=True, capture_output=True, text=True)

    print("Command Output:")
    print(result.stdout)

    if result.stderr:
        print("Error Output:")
        print(result.stderr)
    return "Finished"

In [None]:
@component(
        base_image = f"{NET_DEBUG_IMAGE}"
)
def net() -> str:
    import subprocess
    bash_command = "route -n"
    result = subprocess.run(bash_command, shell=True, capture_output=True, text=True)

    print("Command Output:")
    print(result.stdout)

    if result.stderr:
        print("Error Output:")
        print(result.stderr)
    bash_command = "ifconfig"
    result = subprocess.run(bash_command, shell=True, capture_output=True, text=True)

    print("Command Output:")
    print(result.stdout)

    if result.stderr:
        print("Error Output:")
        print(result.stderr)
    bash_command = "ping 8.8.8.8"
    result = subprocess.run(bash_command, shell=True, capture_output=True, text=True)

    print("Command Output:")
    print(result.stdout)

    if result.stderr:
        print("Error Output:")
        print(result.stderr)
    return "Finished"

In [None]:
@dsl.pipeline(name="debug-pipeline")
def pipeline(
    create_bq_dataset_query: str,
    project: str,
    deployment_project: str,
    region: str,
    model_dir: str,
    bucket_name: str,
    monitoring_name: str,
    monitoring_email: str,
    encryption: str,
    service_account: str,
    train_data_url: str=TRAINING_URL,
    eval_data_url: str=EVAL_URL,
    bq_dataset: str=DATASET_ID,
    bq_train_table: str=TRAINING_TABLE_ID,
    bq_eval_table: str=EVAL_TABLE_ID,
    job_name: str=JOB_NAME,
    requirements_file_path: str=f'{BUCKET_URI}/requirements.txt',
    python_file_path: str=f'{BUCKET_URI}/src/ingest_pipeline.py',
    dataflow_temp_location: str=f'{BUCKET_URI}/temp_dataflow',
    runner: str=RUNNER,                
    lr: float=0.01, 
    epochs: int=5,
    batch_size: int=32,
    base_train_dir: str=f'{BUCKET_URI}/training', 
    tb_log_dir: str=f'{BUCKET_URI}/tblogs',
    deployment_image: str="us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-8:latest",
    deployed_model_name: str='income_bracket_predictor',
    endpoint_name: str='census_endpoint',
    min_nodes: int=2,
    max_nodes: int=4,
    traffic_split: int=25,
):
    #arbitrary containers

    arb_op = arbitrary()
    net_op = net()
        

In [None]:
compiler.Compiler().compile(pipeline_func=pipeline, package_path="./common/vertex-ai-pipeline/pipeline_package.yaml")

In [None]:
from datetime import datetime
timestamp = datetime.now().strftime("%d_%H_%M_%S")
pipelineroot = f'{BUCKET_URI}/pipelineroot'
service_account = COMPUTE_ENGINE_SA
#service_account = "notebook-runner@prj-d-bu3machine-learning-ma6i.iam.gserviceaccount.com"

data_config={
 "train_data_url": TRAINING_URL,
 "eval_data_url": EVAL_URL,
 "bq_dataset": DATASET_ID,
 "bq_train_table": TRAINING_TABLE_ID,
 "bq_eval_table": EVAL_TABLE_ID,
}

dataflow_config={
                "job_name": JOB_NAME,
                "requirements_file_path": f'{BUCKET_URI}/requirements.txt',
                "python_file_path": f'{BUCKET_URI}/src/ingest_pipeline.py',
                "setup_file_uri": f'{BUCKET_URI}/setup.py',
                "temp_location": f'{BUCKET_URI}/temp_dataflow',
                "runner": RUNNER,
}
train_config={
             'lr': 0.01, 
             'epochs': 5, 
             'base_train_dir': f'{BUCKET_URI}/training', 
             'tb_log_dir': f'{BUCKET_URI}/tblogs',
}

deployment_config={
    'image': "us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-8:latest",
    'model_name': "income_bracket_predictor",
    'endpoint_name': "census_income_endpoint",
    'min_nodes': 2,
    'max_nodes': 4,
    'deployment_project': PROJECT_ID,
    "encryption": KMS_KEY,
    "service_account": service_account
}

monitoring_config={
    'email': 'ccolin@clsecteam.com', 
    'name': 'census_monitoring'  
}

pipeline = aiplatform.PipelineJob(
    display_name=f"census_income_{timestamp}",
    template_path='./common/vertex-ai-pipeline/pipeline_package.yaml',
    pipeline_root=pipelineroot,
    encryption_spec_key_name=deployment_config.get("encryption"),
    parameter_values={
        "create_bq_dataset_query": create_bq_dataset_query,
        "bq_dataset": data_config['bq_dataset'],
        "bq_train_table": data_config['bq_train_table'],
        "bq_eval_table": data_config['bq_eval_table'],
        "job_name": dataflow_config['job_name'],
        "train_data_url": data_config['train_data_url'],
        "eval_data_url": data_config['eval_data_url'],
        "requirements_file_path": dataflow_config['requirements_file_path'],
        "python_file_path": dataflow_config['python_file_path'],
        "dataflow_temp_location": dataflow_config['temp_location'],
        "runner": dataflow_config['runner'],
        "project": PROJECT_ID,
        "region": REGION,
        "model_dir": f"{BUCKET_URI}",
        "bucket_name": BUCKET_URI[5:],
        "epochs": train_config['epochs'],
        "lr": train_config['lr'],
        "base_train_dir": train_config['base_train_dir'],
        "tb_log_dir": train_config['tb_log_dir'],
        "deployment_image": deployment_config['image'],
        "deployed_model_name": deployment_config["model_name"],
        "endpoint_name": deployment_config["endpoint_name"],
        "min_nodes": deployment_config["min_nodes"],
        "max_nodes": deployment_config["max_nodes"],
        "deployment_project": deployment_config["deployment_project"],
        "encryption": deployment_config.get("encryption"),
        "service_account": deployment_config["service_account"],
        "monitoring_name": monitoring_config['name'],
        "monitoring_email": monitoring_config['email'], 
        
    },
    enable_caching=False,
)

pipeline.submit(network=PEER_NETWORK)
#pipeline.run(service_account=service_account)

In [None]:
model = tf.saved_model.load(f"{BUCKET_URI}")

In [None]:
model.signatures