In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os

from datetime import datetime
import pytz

from kfp import compiler
from google.cloud import aiplatform

from training_pipeline import pipeline

In [None]:
from dotenv import load_dotenv
load_dotenv()

# Project variables:
PROJECT_ID=os.getenv('PROJECT_ID')
BUCKET_NAME=os.getenv('BUCKET_NAME')
USER=os.getenv('USER')
LOCATION=os.getenv('LOCATION')
SERVICE_ACCOUNT=os.getenv('SERVICE_ACCOUNT')

In [None]:
compiler.Compiler().compile(pipeline, package_path='pipeline.yaml')

In [None]:
# Job parameters:
tz = pytz.timezone("US/Eastern")
EXECUTION_TS = datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
DISPLAY_NAME = "pipeline-log-reg-{}".format(EXECUTION_TS)
PIPELINE_ROOT = "gs://{}/04_deployment/pipeline_root".format(BUCKET_NAME)

# Pipeline input parameters:
data_gcs_uri = f"gs://{BUCKET_NAME}/02_churn_prediction/WA_Fn-UseC_-Telco-Customer-Churn.csv"

numerical = ['tenure', 'monthlycharges']
categorical = [ 'gender',
                'seniorcitizen',
                'partner',
                'dependents',
                'phoneservice',
                'multiplelines',
                'internetservice',
                'onlinesecurity',
                'onlinebackup',
                'deviceprotection',
                'techsupport',
                'streamingtv',
                'streamingmovies',
                'contract',
                'paperlessbilling',
                'paymentmethod',
                ]
label = "churn"
# Running:
job = aiplatform.PipelineJob(
        display_name=DISPLAY_NAME,
        template_path='pipeline.yaml',
        pipeline_root=PIPELINE_ROOT,
        enable_caching=True,
        project=PROJECT_ID,
        location=LOCATION,
        labels={"user": USER},
        parameter_values={
            "project_id": PROJECT_ID, 
            "data_gcs_uri": data_gcs_uri,
            "cat_features": categorical,
            "num_features": numerical,
            "label": label,
        },
)

job.submit(service_account=SERVICE_ACCOUNT)

Creating PipelineJob
PipelineJob created. Resource name: projects/532579765435/locations/us-central1/pipelineJobs/pipeline-log-reg-20231015215155
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/532579765435/locations/us-central1/pipelineJobs/pipeline-log-reg-20231015215155')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/pipeline-log-reg-20231015215155?project=532579765435


In [None]:
pipeline_df = aiplatform.get_pipeline_df(pipeline="pipeline-log-reg")

In [None]:
pipeline_df.head(1)

Unnamed: 0,pipeline_name,run_name,param.input:num_features,param.input:cat_features,param.input:project_id,param.input:data_gcs_uri,param.input:label,param.vertex-ai-pipelines-artifact-argument-binding,metric.auc,metric.mean_auc,metric.std_auc
0,pipeline-log-reg,pipeline-log-reg-20231015215155,"[tenure, monthlycharges]","[gender, seniorcitizen, partner, dependents, p...",dz-learning-d,gs://dz-d-stg-us-ml-zoomcamp/02_churn_predicti...,churn,{'output:evaluate-out_metrics': ['projects/532...,0.856645,0.839588,0.00673


For more information on how to log and compare metrics read: [metrics viz run compare notebook](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/pipelines/metrics_viz_run_compare_kfp.ipynb).