In [3]:
!pip show kfp

Name: kfp
Version: 2.7.0
Summary: Kubeflow Pipelines SDK
Home-page: https://github.com/kubeflow/pipelines
Author: The Kubeflow Authors
Author-email: 
License: 
Location: /Applications/anaconda3/lib/python3.11/site-packages
Requires: click, docstring-parser, google-api-core, google-auth, google-cloud-storage, kfp-pipeline-spec, kfp-server-api, kubernetes, protobuf, PyYAML, requests-toolbelt, tabulate, urllib3
Required-by: 


In [9]:
import kfp
import kfp.components as comp
import requests
import kfp.dsl as dsl

In [3]:
@dsl.component()
def prepare_data():
    import pandas as pd
    df = pd.read_csv('iris.csv')
    df = df.dropna()
    df.to_csv(f'data/final_df.csv', index=False)
    print("\n ---- data csv is saved to PV location /data/final_df.csv ----")

In [4]:
def train_test_split():
    import pandas as pd
    from sklearn.model_selection import train_test_split
    import numpy as np
    print("---- Inside train_test_split component ----")
    final_data = pd.read_csv(f'data/final_df.csv')
    X = final_data.drop(columns='class')
    y = final_data['class']
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=47)
    np.save(f'data/X_train.npy', X_train)
    np.save(f'data/X_train.npy', X_test)
    np.save(f'data/X_train.npy', y_train)
    np.save(f'data/X_train.npy', y_test)
    print("\n---- X_train ----")
    print("\n")
    print(X_train)
    
    print("\n---- X_test ----")
    print("\n")
    print(X_test)
    
    print("\n---- y_train ----")
    print("\n")
    print(y_train)
    
    print("\n---- y_test ----")
    print("\n")
    print(y_test)

In [5]:
def train_basic_classifier():
    from sklearn.linear_model import LogisticRegression
    import numpy as np
    import pandas as pd

    print("---- Inside training_basic_classifier component ----")

    X_train = np.load(f'data/X_train.npy')
    y_train = np.load(f'data/y_train.npy')

    classifier = LogisticRegression(max_iter=500)
    classifier.fit(X_train,y_train)

    import pickle
    with open(f'data/model.pkl','wb') as f:
        pickle.dump(classifier,f)

    print("\n logistic regression classifier is trained on iris data and saved to PV location /data/model.pkl ----")

In [6]:
def predict_on_test_data():
    import pandas as pd
    import numpy as np
    import pickle
    print("---- Inside predict_on_test_data component ----")
    with open(f'data/model.pkl','rb') as f:
        logistic_reg_model = pickle.load(f)

    X_test = np.load(f'data/X_test')
    y_pred = logistic_reg_model.predict(X_test)
    np.save(f'data/y_pred.npy', y_pred)

    print("\n---- Predicted classes ----")
    print("\n")
    print(y_pred)

In [7]:
def predict_prob_on_test_data():
    import pandas as pd
    import numpy as np
    import pickle
    print("---- Inside predict_prob_on_test_data component ----")
    with open(f'data/model.pkl','rb') as f:
        logistic_reg_model = pickle.load(f)
    X_test = np.load(f'data/X_test.npy',allow_pickle=True)
    y_pred_prob = logistic_reg_model.predict_proba(X_test)
    np.save(f'data/y_pred_prob.npy', y_pred_prob)
    
    print("\n---- Predicted Probabilities ----")
    print("\n")
    print(y_pred_prob)

In [20]:
def get_metrics():
    import pandas as pd
    import numpy as np
    from sklearn.metrics import accuracy_score,precision_score,recall_score,log_loss,classification_report
    print("---- Inside get_metrics component ----")
    y_test = np.load(f'data/y_test.npy')
    y_pred = np.load(f'data/y_pred.npy')
    y_pred_prob = np.load(f'data/y_pred_prob.npy')
    acc = accuracy_score(y_pred,y_test)
    prec = precision_score(y_pred,y_test)
    recall = recall_score(y_pred,y_test)
    entropy = log_loss(y_test, y_pred_prob)
    print(classification_report(y_pred,y_test))
    print({'Accuracy':round(acc,2),'precision': round(prec, 2), 'recall': round(recall, 2), 'entropy': round(entropy, 2)})

# Kubeflow pipeline creation work start from here

In [1]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install kfp

Collecting kfp
  Using cached kfp-2.7.0.tar.gz (441 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting docstring-parser<1,>=0.7.3 (from kfp)
  Using cached docstring_parser-0.16-py3-none-any.whl.metadata (3.0 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 (from kfp)
  Using cached google_api_core-2.19.0-py3-none-any.whl.metadata (2.7 kB)
Collecting google-auth<3,>=1.6.1 (from kfp)
  Using cached google_auth-2.30.0-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting google-cloud-storage<3,>=2.2.1 (from kfp)
  Using cached google_cloud_storage-2.17.0-py2.py3-none-any.whl.metadata (6.6 kB)
Collecting kfp-pipeline-spec==0.3.0 (from kfp)
  Using cached kfp_pipeline_spec-0.3.0-py3-none-any.whl.metadata (329 bytes)
Collecting kfp-server-api<2.1.0,>=2.0.0 (from kfp)
  Using cached kfp-server-api-2.0.5.tar.gz (63 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting kubernetes<27,>=8.0.0 (from kfp)
  Using cached kubernetes-26.1.0-py2.py3

In [5]:
pip install jupyter pandas

Collecting jupyter
  Downloading jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)
Collecting notebook (from jupyter)
  Downloading notebook-7.2.1-py3-none-any.whl.metadata (10 kB)
Collecting qtconsole (from jupyter)
  Downloading qtconsole-5.5.2-py3-none-any.whl.metadata (5.1 kB)
Collecting jupyter-console (from jupyter)
  Downloading jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)
Collecting nbconvert (from jupyter)
  Downloading nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)
Collecting ipywidgets (from jupyter)
  Downloading ipywidgets-8.1.3-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.11 (from ipywidgets->jupyter)
  Downloading widgetsnbextension-4.0.11-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.11 (from ipywidgets->jupyter)
  Downloading jupyterlab_widgets-3.0.11-py3-none-any.whl.metadata (4.1 kB)
Collecting beautifulsoup4 (from nbconvert->jupyter)
  Downloading beautifulsoup4-4.12.3-py3-none-any.whl.metadata 

In [11]:
import kfp
import kfp.components as comp
import requests
import kfp.dsl as dsl

In [13]:
create_step_prepare_data = comp.(
    func=prepare_data,
    base_image='python:3.7',
    packages_to_install=['pandas==1.2.4','numpy==1.21.0']
)

AttributeError: module 'kfp.components' has no attribute 'create_step_prepare_data'

In [35]:
!pip install kfp --upgrade

