In [None]:
%load_ext autoreload
%autoreload 2
import clipper_manager as cl
import os
import pandas as pd
import numpy as np
from sklearn import linear_model as lm
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.externals import joblib
import sys
import json


## Connect to EC2

In [None]:
import clipper_manager as cl
# ec2_host = ""
user = "ubuntu"
key = os.path.expanduser("~/.ssh/aws_rsa")
clipper = cl.Cluster(ec2_host, user, key)

## Start Clipper

In [None]:
clipper.start_clipper()

In [None]:
print json.dumps(clipper.get_metrics(), indent=4)

## Start a serving workload

We go to a [different notebook](run_serving_workload.ipynb) so we can start querying the model from a separate process. 

# Train a Scikit-Learn model

In [None]:
def load_digits(digits_location, digits_filename = "train.data", norm=True):
    digits_path = digits_location + "/" + digits_filename
    print("Source file: %s" % digits_path)
    df = pd.read_csv(digits_path, sep=",", header=None)
    data = df.values
    print("Number of image files: %d" % len(data))
    y = data[:,0]
    X = data[:,1:]
    Z = X
    if norm:
        mu = np.mean(X,0)
        sigma = np.var(X,0)
        Z = (X - mu) / np.array([np.sqrt(z) if z > 0 else 1. for z in sigma])
    return Z, y

def filter_data(data):
    cx, cy = data
    binary_x = []
    binary_y = []
    for i in range(len(cy)):
        if cy[i] == 3:
            binary_x.append(cx[i,:])
            binary_y.append(1.0)
        elif cy[i] == 6:
            binary_x.append(cx[i,:])
            binary_y.append(0.0)
    return np.array(binary_x), np.array(binary_y)
            
train_x, train_y = filter_data(load_digits(os.path.expanduser("~/model-serving/data/mnist_data")))
test_x, test_y = filter_data(load_digits(os.path.expanduser("~/model-serving/data/mnist_data"), digits_filename="test.data"))

## Train a bad model

In [None]:
rf_model = RFC(n_estimators=2, max_depth=1)
rf_model.fit(train_x, train_y)
rf_model.score(test_x, test_y)

## Deploy A Bad Model

In [None]:
clipper.add_sklearn_model("bad_rf_model", rf_model)

## Train a Spark Model

Now let's go train a model using Spark in a Databricks Cloud [Notebook](https://amplab-berkeley-research.cloud.databricks.com/#notebook/46987).

## Deploy a Spark Model from S3

In [None]:
clipper.add_pyspark_model("spark_svm", "s3://clipperdbdemo/svm_3_v_6_classifier/svm_predict_3")

## Train an SVM with RBF Kernel

In [None]:
svm_model = svm.SVC()
svm_model.fit(train_x, train_y)
svm_model.score(test_x, test_y)

In [None]:
clipper.add_sklearn_model("rbf_svm_model", svm_model)

In [None]:
print json.dumps(clipper.get_correction_model(0), indent=4)

## Send some corrections

We go to a [different notebook](send_updates.ipynb) to send more training data to Clipper. 

## Stop Clipper

In [None]:
clipper.stop_all()

In [None]:
lm_model = lm.LogisticRegression()
lm_model.fit(train_x, train_y)
lm_model.score(test_x, test_y)

In [None]:
clipper.add_replicas("bad_rf_model", 1, num_replicas=3)