In [1]:
!pip install google-cloud-pipeline-components==0.1.4
!pip install google-cloud-aiplatform==1.3.0
!pip install



In [88]:
!pip install kfp==1.8.2

Collecting kfp==1.8.2
  Downloading kfp-1.8.2.tar.gz (248 kB)
[K     |████████████████████████████████| 248 kB 7.2 MB/s eta 0:00:01
Collecting kfp-pipeline-spec<0.2.0,>=0.1.10
  Downloading kfp_pipeline_spec-0.1.11-py3-none-any.whl (18 kB)
Collecting pydantic<2,>=1.8.2
  Downloading pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl (10.1 MB)
[K     |████████████████████████████████| 10.1 MB 57.6 MB/s eta 0:00:01
[?25hCollecting typing-extensions<4,>=3.10.0.2
  Downloading typing_extensions-3.10.0.2-py3-none-any.whl (26 kB)
Building wheels for collected packages: kfp
  Building wheel for kfp (setup.py) ... [?25ldone
[?25h  Created wheel for kfp: filename=kfp-1.8.2-py3-none-any.whl size=345475 sha256=38a5fc36ec3eb3d8567fc3d0a0450b70f549a3ed3794e368747bbe6318d3c4aa
  Stored in directory: /home/jupyter/.cache/pip/wheels/06/ba/f5/94df72b9dbfc105deb0040be92880d329ace5a0011276c81ab
Successfully built kfp
Installing collected packages: typing-extensions, pydantic, kfp-pipeline-spec, kfp


In [12]:
import kfp
from datetime import datetime
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
from google.cloud import aiplatform
from google_cloud_pipeline_components import aiplatform as gcc_aip
import kfp.v2.components as comp

In [13]:
PROJECT_ID=!gcloud config get-value project # returns default project id 
PROJECT_ID=PROJECT_ID[0]
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET = "gs://"+PROJECT_ID+'/telco-churn/pipeline'
REGION='europe-west2'

VERSION='v1'
JOB_NAME="telco-churn-job-"+VERSION
MODEL_NAME="telco-churn-model-"+VERSION
DATASET_ID='5848592619210276864'

In [14]:
PROJECT_ID

'myfirstproject-226013'

In [15]:
gcs_csv_path = "gs://{}/data/telco/Telco-Customer-Churn.csv".format(PROJECT_ID)
bq_data_table= 'bq://{}.telco.churn'.format(PROJECT_ID)
bq_split_table= 'bq://{}.telco.churn_'.format(PROJECT_ID)
pipeline_root_path = BUCKET+"/pl-root"
pipeline_path = pipeline_root_path+'/churn_classif_pipeline.json'

In [16]:
from typing import NamedTuple
import os
from kfp.v2 import dsl

from kfp.v2.dsl import (
    component,
    Output,
    ClassificationMetrics,
    Metrics,
    HTML,
    Markdown,
    Input,
    Model,
)

_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH')

@component(
    packages_to_install=['sklearn'],
    base_image='python:3.8',
    kfp_package_path=_KFP_PACKAGE_PATH,
)
def produce_metrics(model: Input[Model], metrics: Output[ClassificationMetrics], accuracy: Output[Metrics]):
    
    import logging

    print("We are here")
    print(vars(model))
    print("end")
    
    logging.info(model.uri)
    logging.info(model.path)
    logging.info(vars(model))
    
    accuracy.log_metric('accuracy', (86.0))
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import roc_curve
    from sklearn.datasets import load_wine
    from sklearn.model_selection import train_test_split, cross_val_predict
    from sklearn.metrics import confusion_matrix

    X, y = load_wine(return_X_y=True)
    # Binary classification problem for label 1.
    y = y == 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    rfc = RandomForestClassifier(n_estimators=10, random_state=42)
    rfc.fit(X_train, y_train)
    y_scores = cross_val_predict(rfc, X_train, y_train, cv=3, method='predict_proba')
    y_predict = cross_val_predict(rfc, X_train, y_train, cv=3, method='predict')
    fpr, tpr, thresholds = roc_curve(y_true=y_train, y_score=y_scores[:,1], pos_label=True)
    metrics.log_roc_curve(fpr, tpr, thresholds)
    
    predictions = rfc.predict(X_test)
    
    metrics.log_confusion_matrix(
        ['a', 'b'],
        confusion_matrix(y_test, predictions).tolist() # .tolist() to convert np array to list.
    )


#produce_metrics_op = create_component_from_func(
#    produce_metrics,
#    base_image='python:3.7',
#    packages_to_install=[],
#    output_component_file='component.yaml',
#)

In [17]:
@kfp.dsl.pipeline(name="{}".format(JOB_NAME),
    pipeline_root=pipeline_root_path)
def pipeline(project_id: str, 
             location: str):
        
    dataset_create_op = gcc_aip.TabularDatasetCreateOp(
        project=project_id, 
        display_name="churn-pred", 
        #gcs_source=gcs_csv_path,
        bq_source=bq_data_table,
        location=location
    )
    
    training_op = gcc_aip.CustomPythonPackageTrainingJobRunOp(
        project=project_id,
        display_name="train_churn_prediction_{}".format(VERSION),
        python_package_gcs_uri="gs://{}/telco-churn/dist/trainer-0.1.tar.gz".format(PROJECT_ID),
        python_module="trainer.task",
        model_description="Churn prediction model",
        container_uri='eu.gcr.io/cloud-aiplatform/training/scikit-learn-cpu.0-23:latest', 
        model_serving_container_image_uri='eu.gcr.io/cloud-aiplatform/prediction/sklearn-cpu.0-23:latest',
        dataset=dataset_create_op.outputs["dataset"],
        model_display_name="myChurnModel{}".format(VERSION),
        base_output_dir="{}/assets-{}".format(BUCKET, VERSION),
        staging_bucket="{}/staging".format(BUCKET),
        bigquery_destination="bq://{}".format(PROJECT_ID),
        machine_type= 'n1-standard-2',
        training_fraction_split = 0.8,
        validation_fraction_split = 0.1,
        test_fraction_split = 0.1,
        location=location
    )

    deploy_op = gcc_aip.ModelDeployOp(  
        model=training_op.outputs["model"],
        project=project_id,
        machine_type="n1-standard-4",
        location=location
    )
    
    produce_metrics(training_op.outputs["model"]).set_display_name('testing-it')#.after(training_op)

In [18]:
compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path='/tmp/churn_classif_pipeline.json')

In [19]:
from google.cloud import storage
storage_client = storage.Client()
bucket = storage_client.bucket(PROJECT_ID)
blob = bucket.blob('/'.join(pipeline_path.split('/')[3:]))

blob.upload_from_filename('/tmp/churn_classif_pipeline.json')

In [20]:
""" 
from kfp.v2.google.client import AIPlatformClient
api_client = AIPlatformClient(project_id=PROJECT_ID, region=REGION)
response = api_client.create_run_from_job_spec(
    'churn_classif_pipeline.json',
    pipeline_root=pipeline_root_path,
    parameter_values={
        'project_id': project_id
    })
    
"""

from google.cloud.aiplatform.pipeline_jobs import PipelineJob

pl = PipelineJob(display_name= JOB_NAME,
        template_path= pipeline_path,
        location=REGION,
        parameter_values={'project_id': PROJECT_ID, 
                          'location': REGION})

pl.run(sync=False)

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west2/pipelines/runs/telco-churn-job-v1-20210923164109?project=478111835512


In [21]:
pl.state

<PipelineState.PIPELINE_STATE_PENDING: 2>

INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/478111835512/locations/europe-west2/pipelineJobs/telco-churn-job-v1-20210923164109 current state:
PipelineState.PIPELINE_STATE_RUNNING


In [66]:

from kfp.v2.google.client import AIPlatformClient
api_client = AIPlatformClient(project_id=PROJECT_ID,
                           region='europe-west3')
response = api_client.create_schedule_from_job_spec(
    '/tmp/churn_classif_pipeline.json',
    
    pipeline_root=pipeline_root_path,
    schedule="*/5 * * * *",
    parameter_values={
        'project_id': PROJECT_ID
    })

INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/discovery/v1/apis/serviceusage/v1/rest
INFO:googleapiclient.discovery:URL being requested: POST https://serviceusage.googleapis.com/v1/projects/myfirstproject-226013/services/cloudfunctions.googleapis.com:enable?alt=json
INFO:googleapiclient.discovery:URL being requested: POST https://serviceusage.googleapis.com/v1/projects/myfirstproject-226013/services/cloudscheduler.googleapis.com:enable?alt=json
INFO:googleapiclient.discovery:URL being requested: POST https://serviceusage.googleapis.com/v1/projects/myfirstproject-226013/services/appengine.googleapis.com:enable?alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/discovery/v1/apis/cloudfunctions/v1/rest
INFO:googleapiclient.discovery:URL being requested: GET https://cloudfunctions.googleapis.com/v1/projects/myfirstproject-226013/locations/europe-west3/functions/templated_http_request-v1?alt=json
INFO:googleapicli