Install ZenML and sklearn integration. Then, restart the kernel of the notebook

In [1]:
%pip install "zenml[server]"
!zenml integration install sklearn -y
%pip uninstall pyparsing==2.4.2 -y

import IPython

# automatically restart kernel
IPython.Application.instance().kernel.do_shutdown(restart=True)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[2K[32m⠼[0m Installing integrations.....
[1A[2KFound existing installation: pyparsing 2.4.7
Uninstalling pyparsing-2.4.7:
  Successfully uninstalled pyparsing-2.4.7
Note: you may need to restart the kernel to use updated packages.


{'status': 'ok', 'restart': True}

: 

# ZenML Setup

In [1]:
# start with a fresh ML stack
!rm -rf .zen
!zenml init

[?25l[1;35mInitializing the ZenML global configuration version to 0.61.0[0m
[1;35mCreating database tables[0m
[32m⠋[0m Initializing ZenML repository at /Users/chooenming/Desktop/MLOps.
[2K[1A[2K[32m⠙[0m Initializing ZenML repository at /Users/chooenming/Desktop/MLOps.
[1;35mCreating default workspace 'default' ...[0m
[1;35mCreating default stack in workspace default...[0m
[2K[1A[2K[32m⠹[0m Initializing ZenML repository at /Users/chooenming/Desktop/MLOps.
[2K[1A[2K[32m⠸[0m Initializing ZenML repository at /Users/chooenming/Desktop/MLOps.
[2K[1A[2K[32m⠼[0m Initializing ZenML repository at /Users/chooenming/Desktop/MLOps.
[2K[1A[2K[32m⠴[0m Initializing ZenML repository at /Users/chooenming/Desktop/MLOps.
[1;35mSetting the global active workspace to 'default'.[0m
[33mSetting the global active stack to default.[0m
[1;35mSetting the repo active workspace to 'default'.[0m
[33mSetting the repo active stack to default.[0m
[2K[1A[2K[32m⠦[0m Initia

# Example Experimentation ML Code

In [2]:
import numpy as np
from sklearn.base import ClassifierMixin
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [3]:
def train_test() -> None:
    """
    Train and test a Scikit-learn SVC classifier on digits
    """
    digits = load_digits()
    data = digits.images.reshape((len(digits.images), -1))
    X_train, X_test, y_train, y_test = train_test_split(
        data, digits.target, test_size=0.2, shuffle=False
    )
    model = SVC(gamma=0.001)
    model.fit(X_train, y_train)
    test_acc = model.score(X_test, y_test)
    print(f"Test accuracy: {test_acc}")    

In [4]:
train_test()

Test accuracy: 0.9583333333333334


# Turning experiments into ML pipelines with ZenML

In [5]:
from zenml import step
from typing_extensions import Annotated
import pandas as pd
from typing import Tuple

In [6]:
# importer() takes nothing into the args, but return Tuple as mentioned
# have to mention return what type as will be used in the next step
@step
def importer() -> Tuple[
    Annotated[np.ndarray, "X_train"],
    Annotated[np.ndarray, "X_test"],
    Annotated[np.ndarray, "y_train"],
    Annotated[np.ndarray, "y_test"],
]:
    """
    Load the digits dataset as numpy arrays
    """
    digits = load_digits()
    data = digits.images.reshape((len(digits.images), -1))
    X_train, X_test, y_train, y_test = train_test_split(
        data, digits.target, test_size=0.2, shuffle=False
    )
    
    return X_train, X_test, y_train, y_test


# svc_trainer takes X_train and y_train in np.ndarray and return classifier
@step
def svc_trainer(
    X_train: np.ndarray,
    y_train: np.ndarray,
) -> ClassifierMixin:
    """
    Train an sklearn SVC classifier
    """
    model = SVC(gamma=0.001)
    model.fit(X_train, y_train)
    
    return model


@step
def evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin,
) -> float:
    """
    Calculate the test set accuracy of an sklearn model
    """
    test_acc = model.score(X_test, y_test)
    print(f"Test accuracy: {test_acc}")
    
    return test_acc

use @pipeline decorator to connect all steps into an ML pipeline

<br>Pipeline does not depend on the concrete step functions defined above; it merely establishes a recipe for how data moves through the steps. This means we can replace steps as we wish, e.g. to run the same pipeline with different models to compare the performances

In [7]:
from zenml import pipeline

@pipeline
def digits_pipeline():
    """
    Links all the steps together into a pipeline
    """
    X_train, X_test, y_train, y_test = importer()
    model = svc_trainer(X_train=X_train, y_train=y_train)
    evaluator(X_test=X_test, y_test=y_test, model=model)

# Running ZenML Pipelines
initialise pipeline with concrete step functions and call the `run()` method to run it

In [8]:
digits_svc_pipeline = digits_pipeline()
#digits_svc_pipeline.run(unlisted=True)

[1;35mInitiating a new run for the pipeline: [0m[1;36mdigits_pipeline[1;35m.[0m
[1;35mRegistered new version: [0m[1;36m(version 1)[1;35m.[0m
[1;35mExecuting a new run.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35mYou can visualize your pipeline runs in the [0m[1;36mZenML Dashboard[1;35m. In order to try it locally, please run [0m[1;36mzenml up[1;35m.[0m
[1;35mStep [0m[1;36mimporter[1;35m has started.[0m
[1;35mSkipping visualization of numpy array because matplotlib is not installed. To install matplotlib, run [0m[1;36mpip install matplotlib[1;35m.[0m
[1;35mSkipping visualization of numpy array because matplotlib is not installed. To install matplotlib, run [0m[1;36mpip install matplotlib[1;35m.[0m
[1;35mStep [0m[1;36mimporter[1;35m has finished in [0m[1;36m0.530s[1;35m.[0m
[

visualise the pipeline run in ZenML dashboard
<br>log in with username `default` and empty password, and navigate to the "Runs" tab in the "Pipelines" section

In [None]:
import os
from zenml.environment import Environment

def start_zenml_dashboard(port=8237):
    os.environ['OBJC_DISABLE_INITIALIZE_FORK_SAFETY'] = 'YES'
    
    if Environment.in_google_colab():
        from pyngrok import ngrok

        public_url = ngrok.connect(port)
        print(f"Ngrok tunnel available at: {public_url}")
        os.system(f'zenml up --blocking --port {port}')
    
    else:
        os.system(f'zenml up --port {port}')

if __name__ == "__main__":
    start_zenml_dashboard()
