### RUN THIS NOTEBOOK IN KUBEFLOW JUPYTER NOTEBOOOK

In [5]:

import kfp.dsl as dsl
import yaml
from kubernetes import client as k8s
import kfp.gcp as gcp
from kfp import components
from string import Template
import json
from kubernetes import client as k8s_client


@dsl.pipeline(
  name='breast cancer pipeline',
  description='End to End pipeline for Tensorflow Breast Cancer'
)


def breast_cancer_tensorflow_pipeline(
  
        dataextraction_step_image="gcr.io/<$PROJECT_ID>/breast_cancer/step1_loadingdata:v1",
        dataprocessing_step_image="gcr.io/<$PROJECT_ID>/breast_cancer/step2_dataprocessing:v1",
        trainmodel_step_image="gcr.io/<$PROJECT_ID>/breast_cancer/step3_training_model:v1",
        evaluator_step_image="gcr.io/<$PROJECT_ID>/breast_cancer/step4_evaluation_model:v1",
        train_file='/mnt/training.data',
        data_file="/mnt/breast.data",
        test_file='/mnt/test.data',
        validation_file="/mnt/validation.data",
        split_size=0.2,
        train_target="/mnt/trainingtarget.data",
        test_target="/mnt/testtarget.data",
        validation_target="/mnt/validationtarget.data",
        epochs=5,
        learning_rate=.001,
        batch_size=64,
        shuffle_size=1000,
        tensorboard_logs="/mnt/logs/",
        tensorboard_gcs_logs="gs://<$BUCKET>/breast/logs",
        model_output_base_path="/mnt/saved_model",
        gcs_path="gs://<$BUCKET>/breast/model",
        gcs_path_confusion="gs://<$BUCKET>/breast",
        mode="gcs",
        probability=0.5,
        serving_name="kfserving-breast-model",
        serving_namespace="kubeflow",
          image="gcr.io/<$PROJECT_ID>/breast_cancer/custom_serving_breastcancer:v1",
        bucket_data= "gs://kubeflowusecases/breast/data.csv",
        bucket_name= "gs://kubeflowusecases",
        commit_sha="breast/visualize",
        metrics_plot="/mnt/correlation.png",
       
    
    ):


    """
    Pipeline
    """
    # PVC : PersistentVolumeClaim volume
    vop = dsl.VolumeOp(
      name='my-pvc',
      resource_name="my-pvc",
      modes=dsl.VOLUME_MODE_RWO,
      size="1Gi"
    )


    # data extraction
    data_extraction_step = dsl.ContainerOp(
        name='data_extraction',
        image=dataextraction_step_image,
        command="python",
        arguments=[
            "/app/dataextract.py",
            "--data-file", data_file,
        ],
        pvolumes={"/mnt": vop.volume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa"))

    # processing
    data_processing_step = dsl.ContainerOp(
        name='data_processing',
        image=dataprocessing_step_image,
        command="python",
        arguments=[
            "/app/preprocessing.py",
            "--train-file", train_file,
            "--test-file", test_file,
            "--validation-file", validation_file,
            "--data-file",data_file,
            "--split-size",split_size,
            "--train-target",train_target,
            "--test-target",test_target,
            "--validation-target",validation_target,
            "--bucket-data",bucket_data,
            "--bucket-name",bucket_name,
            "--commit-sha",commit_sha,
            "--metrics-plot",metrics_plot
            
            
        ],file_outputs={"mlpipeline-ui-metadata": "/mlpipeline-ui-metadata.json"
        },
        pvolumes={"/mnt": data_extraction_step.pvolume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa"))

    
   #trainmodel
    train_model_step = dsl.ContainerOp(
        name='train_model',
        image=trainmodel_step_image,
        command="python",
        arguments=[
            "/app/train.py",
            "--train-file", train_file,
            "--test-file", test_file,
            "--validation-file", validation_file,
            "--train-target",train_target,
            "--test-target",test_target,
            "--validation-target",validation_target,
            "--epochs",epochs,
            "--batch-size",batch_size,
            "--learning-rate",learning_rate,
            "--tensorboard-logs",tensorboard_logs,
            "--tensorboard-gcs-logs",tensorboard_gcs_logs,
            "--model-output-base-path",model_output_base_path,
            "--gcs-path", gcs_path,
            "--mode", mode,
          
        ],file_outputs={"mlpipeline-ui-metadata": "/mlpipeline-ui-metadata.json"
        },
        pvolumes={"/mnt": data_processing_step.pvolume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa")) 
  
    #evaluationmodel
    evaluation_model_step = dsl.ContainerOp(
        name='evaluation_model',
        image=evaluator_step_image,
        command="python",
        arguments=[
            "/app/evaluator.py",
            "--test-file", test_file,
            "--test-target",test_target,
            "--probability",probability,
            "--model-output-base-path",model_output_base_path,
            "--gcs-path", gcs_path,
            "--gcs-path-confusion", gcs_path_confusion,
          
        ],file_outputs={"mlpipeline-metrics":"/mlpipeline-metrics.json","mlpipeline-ui-metadata": "/mlpipeline-ui-metadata.json"
        },
        pvolumes={"/mnt": train_model_step.pvolume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa"))
 

    
    kfserving_template = Template("""{
                              "apiVersion": "serving.kubeflow.org/v1alpha2",
                              "kind": "InferenceService",
                              "metadata": {
                                "labels": {
                                  "controller-tools.k8s.io": "1.0"
                                },
                                "name": "$name",
                                "namespace": "$namespace"
                              },
                              "spec": {
                                "default": {
                                  "predictor": {
                                    "custom": {
                                      "container": {
                                        "image": "$image"
                                      }
                                    }
                                  }
                                }
                              }
                            }""")


    kfservingjson = kfserving_template.substitute({ 'name': str(serving_name),
                                'namespace': str(serving_namespace),
                                'image': str(image)})

    kfservingdeployment = json.loads(kfservingjson)

    serve = dsl.ResourceOp(
        name="serve",
        k8s_resource=kfservingdeployment,
        action="apply",
        success_condition="status.url"
    )
    serve.after(evaluation_model_step)
    
    
if __name__ == '__main__':
    import kfp.compiler as compiler
    pipeline_func = breast_cancer_tensorflow_pipeline
    pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'
    compiler.Compiler().compile(pipeline_func,pipeline_filename)

In [None]:
import kfp
from kfp import compiler
import kfp.components as comp
import kfp.dsl as dsl
from kfp import gcp
EXPERIMENT_NAME = 'Breast_Cancer'
client = kfp.Client()

try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)
    
print(experiment)

In [None]:
arguments = {}

run_name = pipeline_func.__name__ + 'breast_run'
run_result = client.run_pipeline(experiment.id, 
                                 run_name, 
                                 pipeline_filename, 
                                 arguments)

print(experiment.id)
print(run_name)
print(pipeline_filename)
print(arguments)