In [1]:
"""%pip install "zenml[server]"
!zenml integration install sklearn -y
%pip install pyparsing==2.4.2"""


'%pip install "zenml[server]"\n!zenml integration install sklearn -y\n%pip install pyparsing==2.4.2'

In [2]:
!rm -rf .zen
!zenml init

'rm' is not recognized as an internal or external command,
operable program or batch file.
'zenml' is not recognized as an internal or external command,
operable program or batch file.


### Example Experimentation ML Code

Let us get started with some simple exemplary ML code. In the following, we train a Scikit-learn SVC classifier to classify images of handwritten digits. We load the data, train a model on the training set, then test it on the test set.

In [3]:
import numpy as np
from sklearn.base import ClassifierMixin
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

def train_test()-> None:
    """Train and test a Scikit-learn SVC classifier on the digits dataset."""
    digits = load_digits()
    data = digits.images.reshape((len(digits.images), -1))
    X_train,X_test,y_train,y_test = train_test_split(
        data,digits.target,test_size = 0.2, shuffle=False
    )
    model=SVC(gamma=0.001)
    model.fit(X_train,y_train)
    test_acc=model.score(X_test,y_test)
    print(f"Test accuracy: {test_acc}")

train_test()
    

Test accuracy: 0.9583333333333334


### Turning experiments into ML pipelines with ZenML

In [4]:
from zenml import step
from typing_extensions import Annotated
import pandas as pd
from typing import Tuple

@step
def importer()-> Tuple[
    Annotated[np.ndarray, "X_train"],
    Annotated[np.ndarray, "X_test"],
    Annotated[np.ndarray, "y_train"],
    Annotated[np.ndarray, "y_test"]
]:
    """Load the digits dataset as numpy arrays."""
    digits = load_digits()
    data = digits.images.reshape((len(digits.images),-1))
    X_train,X_test,y_train,y_test = train_test_split(
        data,digits.target,test_size=0.2,shuffle=False
    )
    return X_train,X_test,y_train,y_test

@step
def svc_trainer(
    X_train: np.ndarray,
    y_train: np.ndarray,

) -> ClassifierMixin:
    """Train an sklearn SVC classifier."""
    model = SVC(gamma=0.001)
    model.fit(X_train,y_train)
    return model

@step
def evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin,
) -> float:
    """Calculate the test set accuracy of an sklearn model."""
    test_acc = model.score(X_test,y_test)
    print(f"Test accuracy: {test_acc}")
    return test_acc

ModuleNotFoundError: No module named 'zenml'

Similarly, we can use ZenML's `@pipeline` decorator to connect all our steps into an ML pipeline.

Note that the pipelone defination does not depend on the concrete step functions we defined above; it merely establishes a recipe for how data moves through the steps. This means we can replace steps as we wish, e.g. to run the same pipeline with different models to compare their performances.

In [None]:
from zenml import pipeline

@pipeline
def digits_pipeline():
    """Link all the steps together in a pipeline"""
    X_train,X_test,y_train,y_test = importer()
    model = svc_trainer(X_train,y_train)
    evaluator(X_test,y_test,model)

### Running ZenML Pipeline

In [None]:
digits_svc_pipeline = digits_pipeline()

[1;35mInitiating a new run for the pipeline: [0m[1;36mdigits_pipeline[1;35m.[0m
[1;35mReusing registered pipeline version: [0m[1;36m(version: 2)[1;35m.[0m
[1;35mExecuting a new run.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35mUsing cached version of [0m[1;36mimporter[1;35m.[0m
[1;35mStep [0m[1;36mimporter[1;35m has started.[0m
[1;35mUsing cached version of [0m[1;36msvc_trainer[1;35m.[0m
[1;35mStep [0m[1;36msvc_trainer[1;35m has started.[0m
[1;35mUsing cached version of [0m[1;36mevaluator[1;35m.[0m
[1;35mStep [0m[1;36mevaluator[1;35m has started.[0m
[1;35mPipeline run has finished in [0m[1;36m1.483s[1;35m.[0m
[1;35mDashboard URL: http://127.0.0.1:8237/workspaces/default/pipelines/ae9b510c-1633-43e6-9595-c9800de983e3/runs/af6847d7-006f-4718-951a-18a982ac2ff8/dag[0m


In [None]:
from zenml.environment import Environment

def start_zenml_dashboard(port=8237):
    if Environment.in_google_colab():
        from pyngrok import ngrok

        public_url = ngrok.connect(port)
        print(f"ZenML dashboard is available at {public_url}")
        !zenml up --blocking --port {port}

    else:
        !zenml up --port {port}

start_zenml_dashboard()

[1;35mThe local ZenML server is already configured with the same parameters.[0m
[?25l[32m⠋[0m Starting service 'LocalZenServer[44c42d26-d92c-48d8-9a3b-3ea362e3f65c] (type: 
zen_server, flavor: local)'.

[1A[2K[1A[2K[1A[2K[1;35mConnecting ZenML to the 'local' local ZenML server (http://127.0.0.1:8237).[0m
[1;35mUpdated the global store configuration.[0m
[1;35mConnected ZenML to the 'local' local ZenML server (http://127.0.0.1:8237).[0m
[2;36mThe local ZenML dashboard is available at [0m[2;32m'http://127.0.0.1:8237'[0m[2;36m. You can [0m
[2;36mconnect to it using the [0m[2;32m'default'[0m[2;36m username and an empty password. [0m
[1;35mAutomatically opening the dashboard in your browser. To disable this, set the env variable AUTO_OPEN_DASHBOARD=false.[0m
