In [None]:
%load_ext autoreload
%autoreload 2
import clipper_manager as cl
import os
import pandas as pd
import numpy as np
from sklearn import linear_model as lm
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.externals import joblib
import sys
import json


# Train a Scikit-Learn model

In [None]:
def load_digits(digits_location, digits_filename = "train.data", norm=True):
    digits_path = digits_location + "/" + digits_filename
    print("Source file: %s" % digits_path)
    df = pd.read_csv(digits_path, sep=",", header=None)
    data = df.values
    print("Number of image files: %d" % len(data))
    y = data[:,0]
    X = data[:,1:]
    Z = X
    if norm:
        mu = np.mean(X,0)
        sigma = np.var(X,0)
        Z = (X - mu) / np.array([np.sqrt(z) if z > 0 else 1. for z in sigma])
    return Z, y

def filter_data(data):
    cx, cy = data
    binary_x = []
    binary_y = []
    for i in range(len(cy)):
        if cy[i] == 3:
            binary_x.append(cx[i,:])
            binary_y.append(1.0)
        elif cy[i] == 6:
            binary_x.append(cx[i,:])
            binary_y.append(0.0)
    return np.array(binary_x), np.array(binary_y)
            
train_x, train_y = filter_data(load_digits(os.path.expanduser("~/model-serving/data/mnist_data")))
test_x, test_y = filter_data(load_digits(os.path.expanduser("~/model-serving/data/mnist_data"), digits_filename="test.data"))

## Train a bad model

In [None]:
rf_model = RFC(n_estimators=2, max_depth=1)
rf_model.fit(train_x, train_y)
rf_model.score(test_x, test_y)

## Connect to EC2

In [46]:
import clipper_manager as cl
ec2_host = "ec2-54-193-105-129.us-west-1.compute.amazonaws.com"
user = "ubuntu"
key = os.path.expanduser("~/.ssh/aws_rsa")
clipper = cl.Cluster(ec2_host, user, key)

Checking if Docker running...
Found Docker running
Creating internal Docker network
Creating local model repository


## Start Clipper

In [53]:
clipper.start_clipper()

starting Clipper with default settings:
models = []
redis_port = 6379
use_lsh = false
window_size = -1
name = "clipper-demo"
input_type = "float"
num_update_workers = 1
num_predict_workers = 1
redis_ip = "redis-clipper"
cache_size = 49999
slo_micros = 20000
correction_policy = "logistic_regression"
input_length = 784
[batching]
sample_size = 1000
strategy = "aimd"

[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw -p 6379:6379 --cpuset-cpus="0" --name redis-clipper redis:alpine
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw -p 1337:1337 --cpuset-cpus="1-4" --name clipper -v ~/conf.toml:/tmp/conf.toml dcrankshaw/clipper


In [54]:
print json.dumps(clipper.get_metrics(), indent=4)

{
    "meters": [
        {
            "rate": 0.0, 
            "name": "prediction_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 0.0, 
            "name": "update_thruput", 
            "unit": "events per second"
        }
    ], 
    "histograms": [
        {
            "std": 0.0, 
            "p99": 0.0, 
            "name": "prediction_latency", 
            "min": 0, 
            "max": 0, 
            "p95": 0.0, 
            "p50": 0.0, 
            "mean": 0.0
        }, 
        {
            "std": 0.0, 
            "p99": 0.0, 
            "name": "in_time_predictions", 
            "min": 0, 
            "max": 0, 
            "p95": 0.0, 
            "p50": 0.0, 
            "mean": 0.0
        }, 
        {
            "std": 0.0, 
            "p99": 0.0, 
            "name": "update_latency", 
            "min": 0, 
            "max": 0, 
            "p95": 0.0, 
            "p50": 0.0, 
            "mean": 0.0
        

## Deploy A Bad Model

In [55]:
clipper.add_sklearn_model("bad_rf_model", rf_model)

/tmp/clipper-models/bad_rf_model/1
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw --name bad_rf_model_v1_r0 -v /tmp/clipper-models/bad_rf_model/1/bad_rf_model:/model:ro dcrankshaw/clipper-sklearn-mw


## Start a serving workload

We go to a [different notebook](run_serving_workload.ipynb) so we can start querying the model from a separate process. 

In [None]:
clipper.add_pyspark_model("spark_svm", "s3://clipperdbdemo/svm_3_v_6_classifier/svm_predict_3")

## Train an SVM with RBF Kernel

In [None]:
svm_model = svm.SVC()
svm_model.fit(train_x, train_y)
svm_model.score(test_x, test_y)

In [None]:
clipper.add_sklearn_model("rbf_svm_model", svm_model)

In [None]:
# clipper = cl.Cluster(ec2_host, user, key)
print json.dumps(clipper.get_correction_model(0), indent=4)

## Send some corrections

We go to a [different notebook](send_updates.ipynb) to send more training data to Clipper. 

## Stop Clipper

In [52]:
clipper.stop_all()

Stopping Clipper and all running models...


In [None]:
lm_model = lm.LogisticRegression()
lm_model.fit(train_x, train_y)
lm_model.score(test_x, test_y)

In [56]:
clipper.add_replicas("bad_rf_model", 1, num_replicas=3)
print json.dumps(clipper.get_metrics(), indent=4)

Adding 3 replicas of model: bad_rf_model
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] run: stat /tmp/clipper-models/bad_rf_model/1
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out:   File: ‘/tmp/clipper-models/bad_rf_model/1’
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out:   Size: 4096      	Blocks: 8          IO Block: 4096   directory
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Device: ca01h/51713d	Inode: 524939      Links: 3
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Access: (0775/drwxrwxr-x)  Uid: ( 1000/  ubuntu)   Gid: ( 1000/  ubuntu)
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Access: 2016-08-24 18:41:57.955033000 +0000
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Modify: 2016-08-24 19:04:39.503848387 +0000
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Change: 2016-08-24 19:04:39.503848387 +0000
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out:  Birth: -
[ec2-54-193-105-129.us-








[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw --name bad_rf_model_v1_r1 -v /tmp/clipper-models/bad_rf_model/1/bad_rf_model:/model:ro dcrankshaw/clipper-sklearn-mw
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 6518d86e594cd1612b8199ffb7b75314cd46d808b3ce0c081356828c45b69237
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 

[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker stop bad_rf_model_v1_r2
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Error response from daemon: No such container: bad_rf_model_v1_r2
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 

[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker rm bad_rf_model_v1_r2
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Error response from daemon: No such container: bad_rf_model_v1_r2
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 









[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw --name bad_rf_model_v1_r2 -v /tmp/clipper-models/bad_rf_model/1/bad_rf_model:/model:ro dcrankshaw/clipper-sklearn-mw
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 968ac462f96c390a1124b8a192946c24584b9a7fed82ca691ea1a08dfe98427f
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 

[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker stop bad_rf_model_v1_r3
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Error response from daemon: No such container: bad_rf_model_v1_r3
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 

[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker rm bad_rf_model_v1_r3
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: Error response from daemon: No such container: bad_rf_model_v1_r3
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 









[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw --name bad_rf_model_v1_r3 -v /tmp/clipper-models/bad_rf_model/1/bad_rf_model:/model:ro dcrankshaw/clipper-sklearn-mw
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: be4b73c0055c37301fac4dfebfd8a84825cebfee1c63541ce234a311329f322f
[ec2-54-193-105-129.us-west-1.compute.amazonaws.com] out: 

{
    "meters": [
        {
            "rate": 0.0, 
            "name": "prediction_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 0.0, 
            "name": "update_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 0.0, 
            "name": "bad_rf_model:model_thruput", 
            "unit": "events per second"
        }
    ], 
    "histograms": [
        {
            "std": 0.0, 
            "p99": 0.0, 
            "name": "prediction_latency", 
            "min": 0, 
            "max": 0, 
            "p9

In [51]:
clipper = cl.Cluster(ec2_host, user, key)

Checking if Docker running...
Found Docker running
Creating internal Docker network
Creating local model repository
