# Create a Simple Vertex AI Pipeline for Auto ML Image Classification

### Step 1: Install necessary libraries

In [None]:
# Install necessary libraries
!pip3 install --user google-cloud-aiplatform==1.0.0 --upgrade
!pip3 install --user kfp google-cloud-pipeline-components==0.1.1 --upgrade
!pip3 install --user google-cloud-aiplatform "shapely<2"

#### The following code restarts the Kernel

In [None]:
# Load necessary libraries
import os
if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

Finally, check that you have correctly installed the packages. **The KFP SDK version should be >=1.6**:

In [None]:
# print KFP SDK version
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
!python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

### Step 2: Set your project ID and bucket

You will need a Google Cloud Project ID to run your pipeline, and you will need a Cloud Storage bucket to store pipeline artifacts. 

In [None]:
# Need to change this for your Project ID
PROJECT_ID = "ml-demos-dar" 

# Create a Cloud Storage Bucket and put it's name below
BUCKET_NAME="gs://ml-demos-dar-ai-pipeline"

# Folder is storage where pipeline artifacts will be stored
PIPELINE_ROOT = f"{BUCKET_NAME}/pipeline_root/"
PIPELINE_ROOT

# Make sure jupyter is in you the PATH
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

# Set the Google Cloud Region
REGION="us-central1"

### Step 3: Import libraries

Add the following to import the *libraries* we'll be using throughout this lab:

In [None]:
# Load necessary libraries
import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
from google.cloud import aiplatform
from google_cloud_pipeline_components import aiplatform as gcc_aip

print("Imported required libraries")

### Step 4: Define the Pipeline using Google Cloud Components

In [None]:
project_id = PROJECT_ID
pipeline_root_path = PIPELINE_ROOT

# Define the workflow of the pipeline.
@kfp.dsl.pipeline(
    name="automl-image-training-v2",
    pipeline_root=pipeline_root_path,)
def pipeline(project_id: str):
    # The first step of your workflow is a dataset generator.
    # This step takes a Google Cloud pipeline component, providing the necessary
    # input arguments, and uses the Python variable `ds_op` to define its
    # output. Note that here the `ds_op` only stores the definition of the
    # output but not the actual returned object from the execution. The value
    # of the object is not accessible at the dsl.pipeline level, and can only be
    # retrieved by providing it as the input to a downstream component.
    ds_op = gcc_aip.ImageDatasetCreateOp(
        project=project_id,
        display_name="flowers",
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        import_schema_uri=aiplatform.schema.dataset.ioformat.image.single_label_classification,
    )

    # The second step is a model training component. It takes the dataset
    # outputted from the first step, supplies it as an input argument to the
    # component (see `dataset=ds_op.outputs["dataset"]`), and will put its
    # outputs into `training_job_run_op`.
    training_job_run_op = gcc_aip.AutoMLImageTrainingJobRunOp(
        project=project_id,
        display_name="dougs-flower-classification",
        prediction_type="classification",
        model_type="CLOUD",
        base_model=None,
        dataset=ds_op.outputs["dataset"],
        model_display_name="dougs-flower-classification",
        training_fraction_split=0.6,
        validation_fraction_split=0.2,
        test_fraction_split=0.2,
        budget_milli_node_hours=8000,
    )

    # The third and fourth step are for deploying the model.
    create_endpoint_op = gcc_aip.EndpointCreateOp(
        project=project_id,
        display_name = "create-endpoint",
    )

    model_deploy_op = gcc_aip.ModelDeployOp(
        model=training_job_run_op.outputs["model"],
        endpoint=create_endpoint_op.outputs['endpoint'],
        #automatic_resources_min_replica_count=1,
        #automatic_resources_max_replica_count=1,
    )

### Step 5: Compile and then Submit the Pipeline to Run

In [None]:
compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='image_classif_pipeline.json')


print("The pipeline has been compiled and saved to the file flower_classification_pipeline.json")


In [None]:
aip_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)

In [None]:
response = aip_client.create_run_from_job_spec(
    job_spec_path="image_classif_pipeline.json",
    pipeline_root=pipeline_root_path,
    parameter_values={
        'project_id': project_id
    }  
)