In [3]:
%pip install "zenml[server]" --user
!zenml integration install sklearn -y
%pip install pyparsing==2.4.2 --user


Collecting zenml[server]
  Using cached zenml-0.55.2-py3-none-any.whl.metadata (17 kB)
Collecting alembic<1.9.0,>=1.8.1 (from zenml[server])
  Using cached alembic-1.8.1-py3-none-any.whl (209 kB)
Collecting azure-mgmt-resource>=21.0.0 (from zenml[server])
  Using cached azure_mgmt_resource-23.0.1-py3-none-any.whl.metadata (35 kB)
Collecting bcrypt==4.0.1 (from zenml[server])
  Using cached bcrypt-4.0.1-cp36-abi3-win_amd64.whl (152 kB)
Collecting click<8.1.4,>=8.0.1 (from zenml[server])
  Using cached click-8.1.3-py3-none-any.whl (96 kB)
Collecting click-params<0.4.0,>=0.3.0 (from zenml[server])
  Using cached click_params-0.3.0-py3-none-any.whl (12 kB)
Collecting cloudpickle<3,>=2.0.0 (from zenml[server])
  Using cached cloudpickle-2.2.1-py3-none-any.whl.metadata (6.9 kB)
Collecting distro<2.0.0,>=1.6.0 (from zenml[server])
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting docker<6.2.0,>=6.1.0 (from zenml[server])
  Using cached docker-6.1.3-py3-none-any.whl.met

In [4]:
!rm -rf .zen
!zenml init

'rm' is not recognized as an internal or external command,
operable program or batch file.


[1;35mNumExpr defaulting to 8 threads.[0m
Found existing ZenML repository at path 
'c:\Users\DELL-7373\Desktop\application\MLops\notebooks'.
⠋ Initializing ZenML repository at 
c:\Users\DELL-7373\Desktop\application\MLops\notebooks.

⠋ Initializing ZenML repository at 
c:\Users\DELL-7373\Desktop\application\MLops\notebooks.




### Example Experimentation ML Code

Let us get started with some simple exemplary ML code. In the following, we train a Scikit-learn SVC classifier to classify images of handwritten digits. We load the data, train a model on the training set, then test it on the test set.

In [5]:
import numpy as np
from sklearn.base import ClassifierMixin
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

def train_test()-> None:
    """Train and test a Scikit-learn SVC classifier on the digits dataset."""
    digits = load_digits()
    data = digits.images.reshape((len(digits.images), -1))
    X_train,X_test,y_train,y_test = train_test_split(
        data,digits.target,test_size = 0.2, shuffle=False
    )
    model=SVC(gamma=0.001)
    model.fit(X_train,y_train)
    test_acc=model.score(X_test,y_test)
    print(f"Test accuracy: {test_acc}")

train_test()
    

Test accuracy: 0.9583333333333334


### Turning experiments into ML pipelines with ZenML

In [6]:
from zenml import step
from typing_extensions import Annotated
import pandas as pd
from typing import Tuple

@step
def importer()-> Tuple[
    Annotated[np.ndarray, "X_train"],
    Annotated[np.ndarray, "X_test"],
    Annotated[np.ndarray, "y_train"],
    Annotated[np.ndarray, "y_test"]
]:
    """Load the digits dataset as numpy arrays."""
    digits = load_digits()
    data = digits.images.reshape((len(digits.images),-1))
    X_train,X_test,y_train,y_test = train_test_split(
        data,digits.target,test_size=0.2,shuffle=False
    )
    return X_train,X_test,y_train,y_test

@step
def svc_trainer(
    X_train: np.ndarray,
    y_train: np.ndarray,

) -> ClassifierMixin:
    """Train an sklearn SVC classifier."""
    model = SVC(gamma=0.001)
    model.fit(X_train,y_train)
    return model

@step
def evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin,
) -> float:
    """Calculate the test set accuracy of an sklearn model."""
    test_acc = model.score(X_test,y_test)
    print(f"Test accuracy: {test_acc}")
    return test_acc

[1;35mNumExpr defaulting to 8 threads.[0m


Similarly, we can use ZenML's `@pipeline` decorator to connect all our steps into an ML pipeline.

Note that the pipelone defination does not depend on the concrete step functions we defined above; it merely establishes a recipe for how data moves through the steps. This means we can replace steps as we wish, e.g. to run the same pipeline with different models to compare their performances.

In [7]:
from zenml import pipeline

@pipeline
def digits_pipeline():
    """Link all the steps together in a pipeline"""
    X_train,X_test,y_train,y_test = importer()
    model = svc_trainer(X_train,y_train)
    evaluator(X_test,y_test,model)

### Running ZenML Pipeline

In [8]:
digits_svc_pipeline = digits_pipeline()

[1;35mInitiating a new run for the pipeline: [0m[1;36mdigits_pipeline[1;35m.[0m


[1;35mCreating database tables[0m
[1;35mCreating default workspace 'default' ...[0m
[1;35mCreating default stack in workspace default...[0m
[1;35mCreating default user 'default' ...[0m
[1;35mSetting the global active workspace to 'default'.[0m
[33mSetting the global active stack to default.[0m
[33mThe current repo active workspace is no longer available. Resetting the active workspace to 'default'.[0m
[33mThe current repo active stack is no longer available. Resetting the active stack to default.[0m
[1;35mReloading configuration file C:\Users\DELL-7373\Desktop\application\MLops\notebooks\.zen\config.yaml[0m
[1;35mRegistered new version: [0m[1;36m(version 1)[1;35m.[0m
[1;35mExecuting a new run.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35mStep [0m[1;36mimporter[1;35m has started.[0m
[1;35

In [10]:
from zenml.environment import Environment

def start_zenml_dashboard(port=8237):
    if Environment.in_google_colab():
        from pyngrok import ngrok

        public_url = ngrok.connect(port)
        print(f"ZenML dashboard is available at {public_url}")
        !zenml up --blocking --port {port}

    else:
        !zenml up --blocking --port {port}

start_zenml_dashboard()

^C
