In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Pipelines: AutomML을 활용한 Image Classification 모델 학습

#### Flowers Dataset을 활용해서 flower 유형(daisy, dandelion, rose, sunflower, tulip) 중에 어떤 것인지 classification하는 모델 학습

## 설치

필요한 library 설치

In [1]:
! pip3 install --upgrade --quiet google-cloud-aiplatform \
                                 kfp \
                                 google-cloud-pipeline-components==2.4.1 \
                                 google-cloud-storage

## 환경 설정

Project ID와 Region 설정 

In [2]:
!gcloud config list

[compute]
region = us-central1
[core]
account = 651078960027-compute@developer.gserviceaccount.com
disable_usage_reporting = True
project = inspired-micron-414202
[dataproc]
region = us-central1

Your active configuration is: [default]


In [3]:
!gcloud projects list

PROJECT_ID              NAME              PROJECT_NUMBER
inspired-micron-414202  My First Project  651078960027


In [4]:
PROJECT_ID = "inspired-micron-414202" 
REGION = "us-central1"

# Project ID 세팅
! gcloud config set project {PROJECT_ID}


Updated property [core/project].


### - Cloud Storage bucket 생성

dataset이나 artifact를 저장하기 위한 bucket을 생성

In [5]:
BUCKET_URI = f"gs://fs-practice-{PROJECT_ID}"  # @param {type:"string"}

In [6]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://fs-practice-inspired-micron-414202/...
ServiceException: 409 A Cloud Storage bucket named 'fs-practice-inspired-micron-414202' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


### - Service Account 설정
Service Account가 Bucket에 접근할 수 있도록 설정

In [7]:
shell_output = !gcloud auth list 2>/dev/null
SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

In [8]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

No changes made to gs://fs-practice-inspired-micron-414202/
No changes made to gs://fs-practice-inspired-micron-414202/


## 주요 Library Import (kuberflow pipeline) 


In [9]:
from typing import Any, Dict, List

import google.cloud.aiplatform as aip
import kfp
from kfp.v2 import compiler

import random
import string

  from kfp.v2 import compiler


Vertext AI Pipeline root 설정

In [10]:
PIPELINE_ROOT = "{}/pipeline_root/automl_image_classification".format(BUCKET_URI)

Initialize AI platform object

In [11]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

## Google Cloud Pipeline Components을 사용하여 Custom Pipeline을 정의

experimental.run_as_aiplatform_custom_job 메서드는 이전에 정의한 구성 요소와 worker_pool_specs 목록(이 경우 하나)을 인수로 받아 사용자 지정 훈련 작업이 구성된다.

그런 다음 google_cloud_pipeline_components 구성 요소를 사용하여 나머지 파이프라인을 정의합니다: 모델 업로드, 엔드포인트 생성 및 모델을 엔드포인트에 배포합니다.



#### Pipeline을 정의

![pipeline 구성](practice_image/Clip7_image.png)

In [12]:
@kfp.dsl.pipeline(name="automl-flower-clf")
def pipeline(project: str = PROJECT_ID, region: str = REGION):
    from google_cloud_pipeline_components.v1.automl.training_job import \
        AutoMLImageTrainingJobRunOp
    from google_cloud_pipeline_components.v1.dataset import \
        ImageDatasetCreateOp
    from google_cloud_pipeline_components.v1.endpoint import (EndpointCreateOp,
                                                              ModelDeployOp)

    ds_op = ImageDatasetCreateOp(
        project=project,
        display_name="flowers_dataset",
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
    )

    training_job_run_op = AutoMLImageTrainingJobRunOp(
        project=project,
        display_name="automl-flower-clf",
        prediction_type="classification",
        model_type="CLOUD",
        dataset=ds_op.outputs["dataset"],
        model_display_name="automl-flower-clf",
        training_fraction_split=0.7,
        validation_fraction_split=0.15,
        test_fraction_split=0.15,
        budget_milli_node_hours=9000,
    )

    endpoint_op = EndpointCreateOp(
        project=project,
        location=region,
        display_name="automl-flower-clf",
    )

    ModelDeployOp(
        model=training_job_run_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        automatic_resources_min_replica_count=1,
        automatic_resources_max_replica_count=1,
    )

## Pipeline 실행

In [13]:
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="automl_image_classification_pipeline.yaml"
)

In [None]:
UUID = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
DISPLAY_NAME = "flowers_automl_" + UUID

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="automl_image_classification_pipeline.yaml",
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

job.run()

# ! rm automl_image_classification_pipeline.yaml

Creating PipelineJob
PipelineJob created. Resource name: projects/651078960027/locations/us-central1/pipelineJobs/automl-flower-clf-20240215105538
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/651078960027/locations/us-central1/pipelineJobs/automl-flower-clf-20240215105538')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/automl-flower-clf-20240215105538?project=651078960027
PipelineJob projects/651078960027/locations/us-central1/pipelineJobs/automl-flower-clf-20240215105538 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/651078960027/locations/us-central1/pipelineJobs/automl-flower-clf-20240215105538 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/651078960027/locations/us-central1/pipelineJobs/automl-flower-clf-20240215105538 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/651078960027/locations/us-central1/pip

## SDK를 활용해서 Pipeline 정보 확인 및 컨트롤

In [None]:
def get_task_detail(
    task_details: List[Dict[str, Any]], task_name: str
) -> List[Dict[str, Any]]:
    for task_detail in task_details:
        if task_detail.task_name == task_name:
            return task_detail

#### pipeline detail 확인

In [None]:
pipeline_task_details = (
    job.gca_resource.job_detail.task_details
)
pipeline_task_details

#### endpoint 확인 

In [None]:
endpoint_task = get_task_detail(pipeline_task_details, "endpoint-create")
endpoint_resourceName = (
    endpoint_task.outputs["endpoint"].artifacts[0].metadata["resourceName"]
)
endpoint = aip.Endpoint(endpoint_resourceName)
endpoint

#### endpoint undeploy와 삭제

In [None]:
endpoint.undeploy_all()
endpoint.delete()

#### pipeline의 모델과 모델 제거하기

In [None]:
model_task = get_task_detail(pipeline_task_details, "model-upload")
model_resourceName = model_task.outputs["model"].artifacts[0].metadata["resourceName"]
model = aip.Model(model_resourceName)
model.delete()

#### Pipline 제거하기

In [None]:
job.delete()