In [None]:
%load_ext autoreload
%autoreload 2
import clipper_manager as cl
import os
import pandas as pd
import numpy as np
from sklearn import linear_model as lm
from sklearn import svm
# import sklearn.ensemble.RandomForestClassifier as RFC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.externals import joblib
import sys
ec2_host = "ec2-54-67-0-164.us-west-1.compute.amazonaws.com"
user = "ubuntu"
key = os.path.expanduser("~/.ssh/aws_rsa")

# Train a Scikit-Learn model

In [None]:
def load_digits(digits_location, digits_filename = "train.data", norm=True):
    digits_path = digits_location + "/" + digits_filename
    print("Source file: %s" % digits_path)
    df = pd.read_csv(digits_path, sep=",", header=None)
    data = df.values
    print("Number of image files: %d" % len(data))
    y = data[:,0]
    X = data[:,1:]
    Z = X
    if norm:
        mu = np.mean(X,0)
        sigma = np.var(X,0)
        Z = (X - mu) / np.array([np.sqrt(z) if z > 0 else 1. for z in sigma])
#     label = 3
#     my_y = [1. if i == label else -1.0 for i in y]
    return Z, y

def filter_data(data):
    cx, cy = data
    binary_x = []
    binary_y = []
    for i in range(len(cy)):
        if cy[i] == 3:
            binary_x.append(cx[i,:])
            binary_y.append(1.0)
        elif cy[i] == 6:
            binary_x.append(cx[i,:])
            binary_y.append(0.0)
    return np.array(binary_x), np.array(binary_y)
            
train_x, train_y = filter_data(load_digits(os.path.expanduser("~/model-serving/data/mnist_data")))
test_x, test_y = filter_data(load_digits(os.path.expanduser("~/model-serving/data/mnist_data"), digits_filename="test.data"))

## Train a bad model

In [None]:
rf_model = RFC(n_estimators=2, max_depth=2)
rf_model.fit(train_x, train_y)
rf_model.score(test_x, test_y)

In [None]:
lm_model = lm.LogisticRegression()
lm_model.fit(train_x, train_y)
lm_model.score(test_x, test_y)

## Train an SVM with RBF Kernel

In [None]:
svm_model = svm.SVC()
svm_model.fit(train_x, train_y)
svm_model.score(test_x, test_y)

In [None]:
import clipper_manager as cl
clipper = cl.Cluster(ec2_host, user, key)

In [None]:
clipper.start_clipper()

In [29]:
clipper.add_sklearn_model("bad_rf_model", rf_model)

/tmp/clipper-models/bad_rf_model/1
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] run: mkdir -p /tmp/clipper-models/bad_rf_model/1
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] sudo: docker stop bad_rf_model_v1_r0
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] sudo: docker rm bad_rf_model_v1_r0
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw --name bad_rf_model_v1_r0 -v /tmp/clipper-models/bad_rf_model/1/bad_rf_model:/model:ro dcrankshaw/clipper-sklearn-mw


In [36]:
clipper.add_sklearn_model("good_svm_model", svm_model)

/tmp/clipper-models/good_svm_model/1
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] run: mkdir -p /tmp/clipper-models/good_svm_model/1
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] sudo: docker stop good_svm_model_v1_r0
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] sudo: docker rm good_svm_model_v1_r0
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] sudo: docker run -d --network=clipper_nw --name good_svm_model_v1_r0 -v /tmp/clipper-models/good_svm_model/1/good_svm_model:/model:ro dcrankshaw/clipper-sklearn-mw


In [32]:
# clipper = cl.Cluster(ec2_host, user, key)
clipper.add_pyspark_model("spark_svm", "s3://clipperdbdemo/test823/svm_predict_3")

/tmp/clipper-models/spark_svm/1
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] run: mkdir -p /tmp/clipper-models/spark_svm/1
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] run: aws s3 cp s3://clipperdbdemo/test823/svm_predict_3 /tmp/clipper-models/spark_svm/1/svm_predict_3 --recursive
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] out: download: s3://clipperdbdemo/test823/svm_predict_3/data/_SUCCESS to svm_predict_3/data/_SUCCESS
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] out: Completed 1 of 6 part(s) with 5 file(s) remaining
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] out: download: s3://clipperdbdemo/test823/svm_predict_3/data/_metadata to svm_predict_3/data/_metadata
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] out: Completed 2 of 6 part(s) with 4 file(s) remaining
[ec2-54-67-0-164.us-west-1.compute.amazonaws.com] out: download: s3://clipperdbdemo/test823/svm_predict_3/metadata/_SUCCESS to svm_predict_3/metadata/_SUCCESS
[ec2-54-67-0-164.us-west-1.compute.

In [37]:
metrics = clipper.get_metrics()

{
    "meters": [
        {
            "rate": 45.63718534457216, 
            "name": "prediction_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 0.0, 
            "name": "update_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 7.191313557036659, 
            "name": "bad_rf_model:model_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 11.824179021666044, 
            "name": "spark_svm:model_thruput", 
            "unit": "events per second"
        }, 
        {
            "rate": 47.0744699581482, 
            "name": "good_svm_model:model_thruput", 
            "unit": "events per second"
        }
    ], 
    "histograms": [
        {
            "std": 185.02027008567492, 
            "p99": 20202.39, 
            "name": "prediction_latency", 
            "min": 18012, 
            "max": 20217, 
            "p95": 20182.0, 
            "p50": 2

In [None]:
clipper.stop_all()