In [1]:
import kfp as kfp
import kfp.compiler as cpl
from kfp import dsl
from kfp.v2.dsl import component, Input, Output, OutputPath, Dataset, Model, ClassificationMetrics
import datetime

In [2]:
PYTHON_PACKAGES = ['scikit-learn']
BASE_IMAGE = 'python:3.9'
EXPERIMENT_NAME = 'VISUALIZACOES'
RUN_NAME = 'RUN_'

In [3]:
@component(
    packages_to_install=PYTHON_PACKAGES,
    base_image=BASE_IMAGE,
    output_component_file='iris_confusion_matrix.yaml'
)
def iris_sgdclassifier(test_samples_fraction: float, metrics: Output[ClassificationMetrics]):
    from sklearn import datasets, model_selection
    from sklearn.linear_model import SGDClassifier
    from sklearn.metrics import confusion_matrix

    iris_dataset = datasets.load_iris()
    train_x, test_x, train_y, test_y = model_selection.train_test_split(
        iris_dataset['data'], iris_dataset['target'], test_size=test_samples_fraction)

    classifier = SGDClassifier()
    classifier.fit(train_x, train_y)
    predictions = model_selection.cross_val_predict(classifier, train_x, train_y, cv=3)
    metrics.log_confusion_matrix(
        ['Setosa', 'Versicolour', 'Virginica'],
        confusion_matrix(train_y, predictions).tolist() # .tolist() to convert np array to list.
    )

In [4]:
@component(
    packages_to_install=PYTHON_PACKAGES,
    base_image=BASE_IMAGE,
    output_component_file='wine_roc_curve.yaml'
)
def wine_classification(n_estimators: int, metrics: Output[ClassificationMetrics]):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import roc_curve
    from sklearn.datasets import load_wine
    from sklearn.model_selection import train_test_split, cross_val_predict

    X, y = load_wine(return_X_y=True)
    # Binary classification problem for label 1.
    y = y == 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    rfc = RandomForestClassifier(n_estimators=10, random_state=42)
    rfc.fit(X_train, y_train)
    y_scores = cross_val_predict(
        rfc, X_train, y_train, cv=3, method='predict_proba')
    y_predict = cross_val_predict(rfc, X_train, y_train, cv=3, method='predict')
    fpr, tpr, thresholds = roc_curve(
        y_true=y_train, y_score=y_scores[:, 1], pos_label=True)
    metrics.log_roc_curve(fpr, tpr, thresholds)

In [5]:
@dsl.pipeline(
    name='metrics-visualization-pipeline'
)
def metrics_visualization_pipeline(n_estimators:int, test_samples_fraction:float):
    create_confusion_matrix = iris_sgdclassifier(test_samples_fraction=test_samples_fraction)
    creata_roc_curve = wine_classification(n_estimators=n_estimators)

In [6]:
KFP_ENDPOINT='http://localhost:8080/pipeline'
client = kfp.Client(host=KFP_ENDPOINT)

In [7]:
args = {
    'n_estimators': 58,
    'test_samples_fraction': 0.2
}

client.create_run_from_pipeline_func(
    metrics_visualization_pipeline,
    mode=kfp.dsl.PipelineExecutionMode.V2_COMPATIBLE,
    arguments=args,
    experiment_name=EXPERIMENT_NAME, 
    run_name=RUN_NAME+str(datetime.datetime.now())
)



RunPipelineResult(run_id=110c856f-c82d-450b-97d7-e3debe2e5a2f)

## Compilando o pipeline de modo compatível com SDK V2 e executando

In [8]:
cpl.Compiler(mode=kfp.dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
    pipeline_func=metrics_visualization_pipeline,
    package_path='pipeline_iris_e_wine.yaml'
)    

In [9]:
args = {
    'n_estimators': 22,
    'test_samples_fraction': 0.08
}

client.create_run_from_pipeline_package(
    pipeline_file='pipeline_iris_e_wine.yaml',
    arguments=args,
    experiment_name=EXPERIMENT_NAME, 
    run_name=RUN_NAME+str(datetime.datetime.now())
)

RunPipelineResult(run_id=16c69539-68d2-4b09-a907-bcc3887aaa38)