# Training in Vertex Pipelines

In this

In [None]:
! mkdir -p _artifacts

In [None]:
GCP_REGION = "europe-west3"

# TODO: Enter your name here. We'll use this to tag your unique
#       Docker image to avoid clashing with other people.
USER_NAME = "julian"

In [None]:
! make -C ../ USER_NAME=$USER_NAME docker-push

In [None]:
from typing import Optional, NamedTuple

import kfp
from kfp import components
from kfp.v2 import compiler
from kfp.v2.dsl import (
    component,
    Input,
    InputPath,
    OutputPath,
    Output,
    Dataset,
    Metrics,
    Model
)

@component(
    base_image=f"{GCP_REGION}-docker.pkg.dev/gdd-cb-vertex/docker/fancy-fashion-{USER_NAME}",
    output_component_file="_artifacts/train.yaml",
)
def train(train_data_path: str, model: Output[Model]) -> None:
    """Trains the model on the given dataset."""
    
    from pathlib import Path
    import joblib
    
    from fancy_fashion.model import train_model
    from fancy_fashion.util import local_gcs_path
    
    trained_model = train_model(local_gcs_path(train_data_path))

    model_dir = Path(model.path)
    model_dir.mkdir(parents=True, exist_ok=True)
    joblib.dump(trained_model, model_dir / "model.pkl")

    
@component(
    base_image=f"{GCP_REGION}-docker.pkg.dev/gdd-cb-vertex/docker/fancy-fashion-{USER_NAME}",
    output_component_file="_artifacts/evaluate.yaml",
)
def evaluate(
    test_data_path: str, model: InputPath("Model"), metrics: Output[Metrics]
) -> NamedTuple("EvalModelOutput", [("roc", float)]):
    # TODO: Implement the actual evaluation.
    #       Tip: we can use the evaluate_model function from our package.
    metrics.log_metric("roc", 0.9)

    
@kfp.dsl.pipeline(name="fancy-fashion-julian")
def pipeline(train_path: str):
    train_task = train(train_path)
    
    # TODO: Add an evaluate task that uses the evaluate component above.

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="_artifacts/pipeline.json",
)

In [None]:
from google.cloud.aiplatform.pipeline_jobs import PipelineJob

job = PipelineJob(
    display_name=f"fancy-fashion-{USER_NAME}",
    enable_caching=False,
    template_path="_artifacts/pipeline.json",
    parameter_values={
        "train_path": "gs://gdd-cb-vertex-fashion-inputs/train"
    },
    pipeline_root=f"gs://gdd-cb-vertex-fashion-artifacts/pipelines",
    location=GCP_REGION,
)

job.run(
    service_account=f"vmd-fashion@gdd-cb-vertex.iam.gserviceaccount.com"
)