## Scikit-Learn Clustering KMeans Testing

The following example will:

* Upload and convert a 

In [1]:
import json
import os
import pickle

import wallaroo
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework

import pyarrow as pa
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

wl = wallaroo.Client(auth_type="sso", interactive=True)

In [2]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

In [3]:
workspace = get_workspace("sklearn-svm-pca-workspace")
wl.set_current_workspace(workspace)

{'name': 'sklearn-svm-pca-workspace', 'id': 87, 'archived': False, 'created_by': 'd9a72bd9-2a1c-44dd-989f-3c7c15130885', 'created_at': '2023-07-05T15:09:56.931033+00:00', 'models': [], 'pipelines': []}

## Data & Model Creation

In [4]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float64(), list_size=4))
])

output_schema = pa.schema([
    pa.field('predictions', pa.int32())
])

## Upload model

In [5]:
model = wl.upload_model('sklearn-svm-pca', 'models/model-auto-conversion_sklearn_svm_pca_pipeline.pkl', framework=Framework.SKLEARN, input_schema=input_schema, output_schema=output_schema)

model

Waiting for model conversion... It may take up to 10.0min.
Model is Pending conversion..Converting..Pending conversion..Converting.........Ready.


{'name': 'sklearn-svm-pca', 'version': '1a0638bc-a5df-49f6-9b8c-34a2eeccf113', 'file_name': 'model-auto-conversion_sklearn_svm_pca_pipeline.pkl', 'image_path': 'proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mlflow-deploy:v2023.3.0-main-3466', 'last_update_time': datetime.datetime(2023, 7, 5, 15, 11, 14, 52542, tzinfo=tzutc())}

## Configure model and pipeline

In [6]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [7]:
pipeline_name = f"sklearn-svm-pca-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 90s ........... ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.244.12.5',
   'name': 'engine-5d69f9c58d-rwjhw',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'sklearn-svm-pca-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'sklearn-svm-pca',
      'version': '1a0638bc-a5df-49f6-9b8c-34a2eeccf113',
      'sha': '524b05d22f13fa4ce5feaf07b86710b447f0c80a02601be86ee5b6bc748fe7fd',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.244.15.5',
   'name': 'engine-lb-584f54c899-w94f6',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip': '10.244.0.211',
   'name': 'engine-sidekick-sklearn-svm-pca-117-5bdbd47dd6-r7vdb',
   'status': 'Running',
   'reason': None,
   'details': [],
   'statuses': '\n'}]}

## Inference

In [11]:
data = pd.read_json('data/test-sklearn-kmeans.json')
display(data)

# move the column values to a single array input
dataframe = pd.DataFrame({"inputs": data[:2].values.tolist()})
display(dataframe)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2


Unnamed: 0,inputs
0,"[5.1, 3.5, 1.4, 0.2]"
1,"[4.9, 3.0, 1.4, 0.2]"


In [9]:
pipeline.infer(dataframe)

Unnamed: 0,time,in.inputs,out.predictions,check_failures
0,2023-07-05 15:11:29.776,"[5.1, 3.5, 1.4, 0.2]",0,0
1,2023-07-05 15:11:29.776,"[4.9, 3.0, 1.4, 0.2]",0,0


In [10]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ..................................... ok


0,1
name,sklearn-svm-pca-pipeline
created,2023-07-05 15:11:17.606017+00:00
last_updated,2023-07-05 15:11:17.641020+00:00
deployed,False
tags,
versions,"a12c7fe4-e1ac-4783-a310-b51bd68d86d6, e69307a8-976d-477c-b810-96f60337b893"
steps,sklearn-svm-pca
