# Building Kubeflow Pipeline

In [2]:

import kfp.dsl as dsl
import yaml
from kubernetes import client as k8s
import kfp.gcp as gcp
from kfp import components
from string import Template
import json
from kubernetes import client as k8s_client
import kfp.compiler as compiler
from kfp import components


@dsl.pipeline(
  name='Feature Store Service Kubeflow pipeline',
  description='End to End pipeline for Feature Store training and serving'
)


def feature_store_training_pipeline(
        
        feature_step_image= "gcr.io/<PROJECT_ID>/mlops_world/featureingestion:latest",
        trainmodel_step_image= "gcr.io/<PROJECT_ID>/mlops_world/feastrainingjob:latest",
        evaluator_step_image="gcr.io/<PROJECT_ID>/mlops_world/modelevaluation:latest", 


        staging_bucket="gs://<STAGING_BUCKET>/",
        target_name="fare_statistics__target",
        data_id="gs://<BUCKET>/driver_id.csv",
        data_source="batch",
        data_features="gs://<BUCKET>/features.json",
        project="<PROJECT_ID>",
        epochs=5,
        batch_size=32, 
        tensorboard_gcs_logs="gs://<BUCKET>/taxi/logs",

        gcs_path="gs://<BUCKET>/taxi/model",
        gcs_path_confusion="gs://<BUCKET>/taxi/",
        mode="gcs",
        probability=0.5,

        
    
        serving_name='<MODEL_NAME>',
        serving_namespace="<NAMESPACE>",
        serving_step_image="gcr.io/<PROJECT_ID>/mlops_world/kfservingcustom:latest",
        model_storage_path="gs://<BUCKET>/taxi/model"
       
    
       
    ):


    """
    Pipeline
    """
    # PVC : PersistentVolumeClaim volume
    vop = dsl.VolumeOp(
      name='my-pvc',
      resource_name="my-pvc",
      modes=dsl.VOLUME_MODE_RWO,
      size="1Gi"
    )

   
    # feature store
    feature_store_step = dsl.ContainerOp(
        name='Feature Store Service',
        image=feature_step_image,
        command="python",
        arguments=[
            "/app/featurestore_service.py",
            "--staging-bucket", staging_bucket,
            "--project",project,
            "--target-name", target_name,
            "--data-id", data_id,
            "--data-source",data_source,
            "--data-features", data_features,
    
        ],
        pvolumes={"/mnt": vop.volume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa"))

    
   #trainmodel
    train_model_step = dsl.ContainerOp(
        name='Train Historical Data',
        image=trainmodel_step_image,
        command="python",
        arguments=[
            "/app/train.py",
            "--epochs",epochs,
            "--batch-size",batch_size,
            "--tensorboard-gcs-logs",tensorboard_gcs_logs,

          
        ],file_outputs={"mlpipeline-ui-metadata": "/mlpipeline-ui-metadata.json" 
        },
        pvolumes={"/mnt": feature_store_step.pvolume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa")) 
   
    #evaluationmodel
    evaluation_model_step = dsl.ContainerOp(
        name='evaluation_model',
        image=evaluator_step_image,
        command="python",
        arguments=[
            "/app/evaluator.py",
            "--probability",probability,
            "--gcs-path", gcs_path,
            "--gcs-path-confusion", gcs_path_confusion,
            "--mode",mode
          
        ],file_outputs={"mlpipeline-metrics":"/mlpipeline-metrics.json","mlpipeline-ui-metadata": "/mlpipeline-ui-metadata.json"
        },
        pvolumes={"/mnt": train_model_step.pvolume}
    ).apply(gcp.use_gcp_secret("user-gcp-sa"))
    
    
  

    #serving model
    kfserving_template = Template("""
                                 {
                                   "apiVersion": "serving.kubeflow.org/v1alpha2",
                                   "kind": "InferenceService",
                                   "metadata": {
                                      "labels": {
                                         "controller-tools.k8s.io": "1.0"
                                      },
                                      "name": "<MODEL_NAME>",
                                      "namespace": <NAMESPACE>,
                                      "annotations": {
                                         "sidecar.istio.io/inject": "false"
                                      }
                                   },
                                   "spec": {
                                      "default": {
                                         "predictor": {
                                            "custom": {
                                               "container": {
                                                  "name": "kfserving-container",
                                                  "image": "gcr.io/<PROJECT_ID>/mlops_world/kfservingcustom:latest",
                                                  "env": [
                                                     {
                                                        "name": "STORAGE_URI",
                                                        "value": "gs://<BUCKET>/taxi/model"
                                                     }
                                                  ],
                                                  "imagePullPolicy": "Always"
                                               }
                                            }
                                         }
                                      }
                                   }
                                }
                                    """)

    kfservingjson = kfserving_template.substitute({ "name": str(serving_name) ,\
                                                   "namespace": str(serving_namespace), \
                                                   "image" : str(serving_step_image), \
                                                   "bucket":str(model_storage_path)})
    kfservingdeployment = json.loads(kfservingjson)

        

    serve = dsl.ResourceOp(
        name="serve",
        k8s_resource=kfservingdeployment,
        action="apply",
        success_condition="status.url"
    )
    serve.after(evaluation_model_step)
    
  
    
if __name__ == '__main__':
    import kfp.compiler as compiler
    pipeline_func = feature_store_training_pipeline
    pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'
    compiler.Compiler().compile(pipeline_func,pipeline_filename)