## Explore the Environment

### Setup Environment

In [None]:
!cd ~ && git clone --recursive https://github.com/fluxcapacitor/pipeline.io

### Explore Environment

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-training.sh

In [None]:
!kubectl get pod

### [Training Cluster](http://kubernetes.demo.datasticks.com/)

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-predictions.sh

In [None]:
!kubectl get pod

### [Prediction Cluster - AWS](http://kubernetes-aws.demo.pipeline.io/)

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-gcp-predictions.sh

In [None]:
!kubectl get pod

### [Prediction Cluster - GCP](http://kubernetes-gcp.demo.pipeline.io/)

## Generate Spark ML Decision Tree

### Scale Out Spark Cluster

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-training.sh

In [None]:
!/root/pipeline.io/bin/pipeline-deployment-scale-out-training.sh

In [None]:
!kubectl get pod

### [Spark Admin](http://spark.demo.pipeline.io)

### [Training Cluster](http://kubernetes.demo.pipeline.io/)

### Setup SparkSession

In [None]:
from pyspark.sql import SparkSession

sparkSession = SparkSession.builder.getOrCreate()

### Load Training Dataset from S3 into Spark

In [None]:
data = sparkSession.read.format("csv") \
  .option("inferSchema", "true").option("header", "true") \
  .load("s3a://datapalooza/R/census.csv")

data.head()

### Build Spark ML Pipeline with Decision Tree Classifier

In [None]:
from pyspark.ml import Pipeline
from pyspark.ml.feature import RFormula
from pyspark.ml.classification import DecisionTreeClassifier

formula = RFormula(formula = "income ~ .")
classifier = DecisionTreeClassifier()

pipeline = Pipeline(stages = [formula, classifier])

pipelineModel = pipeline.fit(data)

print(pipelineModel)

In [None]:
print(pipelineModel.stages[1].toDebugString)

## Convert Spark ML Pipeline to PMML

In [None]:
from jpmml import toPMMLBytes

pmmlBytes = toPMMLBytes(sparkSession, data, pipelineModel)

pmmlBytes.decode("utf-8")

## Deployment Option 1:  Mutable Model Deployment

### Deploy New Model to Live, Running Model Server

In [None]:
import urllib.request

update_url = 'http://prediction-aws.demo.pipeline.io/update-pmml/census'

update_headers = {}
update_headers['Content-type'] = 'application/xml'

req = urllib.request.Request(update_url, headers=update_headers, data=pmmlBytes)
resp = urllib.request.urlopen(req)

print(resp.status) # Should return Http Status 200 

### Test New Model on Live, Running Model Server

In [None]:
import urllib.parse
import json

evaluate_url = 'http://prediction-aws.demo.pipeline.io/evaluate-pmml/census'

evaluate_headers = {}
evaluate_headers['Content-type'] = 'application/json'
input_params = '{"age":39,"workclass":"State-gov","education":"Bachelors","education_num":13,"marital_status":"Never-married","occupation":"Adm-clerical","relationship":"Not-in-family","race":"White","sex":"Male","capital_gain":2174,"capital_loss":0,"hours_per_week":40,"native_country":"United-States"}' 
encoded_input_params = input_params.encode('utf-8')

req = urllib.request.Request(evaluate_url, headers=evaluate_headers, data=encoded_input_params)
resp = urllib.request.urlopen(req)

print(resp.read()) # Should return valid classification with probabilities

## Deployment Option 2:  Immutable Model Deployment

### Save Model to Disk

In [None]:
with open('/root/census.pmml', 'wb') as f:
  f.write(pmmlBytes)

!cat /root/census.pmml

### Commit to Github

In [None]:
# Note:  You may need to run this from a terminal in order to set creds
#!/root/datasticks-github-push.sh

### Deploy New Model Server with New Model from Github

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-predictions.sh

In [None]:
!kubectl get pod

## Deploy to Google Cloud Cluster! 

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-gcp-predictions.sh

In [None]:
!kubectl get pod

## Load Test Predictions Across AWS and Google Cloud

In [None]:
from IPython.display import display, HTML

html = '<iframe width=100% height=100px src="http://hystrix.demo.pipeline.io/hystrix-dashboard/monitor/monitor.html?streams=%5B%7B%22name%22%3A%22Circuit%20Breakers%22%2C%22stream%22%3A%22turbine.demo.pipeline.io%2Fturbine.stream%22%2C%22auth%22%3A%22%22%2C%22delay%22%3A%22%22%7D%5D">'
display(HTML(html))

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-training.sh

## Scale Out Model Servers

### AWS

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-predictions.sh

In [None]:
!kubectl get pod

In [None]:
!/root/pipeline.io/bin/pipeline-deployment-scale-out-predictions.sh

In [None]:
!kubectl get pod

### Google

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-gcp-predictions.sh

In [None]:
!kubectl get pod

In [None]:
!/root/pipeline.io/bin/pipeline-deployment-scale-out-predictions.sh

In [None]:
!kubectl get pod

## Scale In and Cleanup

### Training - Spark

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-training.sh

In [None]:
!kubectl get pod

In [None]:
!/root/pipeline.io/bin/pipeline-deployment-scale-in-training.sh

In [None]:
!kubectl get pod

### AWS

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-aws-predictions.sh

In [None]:
!kubectl get pod

In [None]:
!/root/pipeline.io/bin/datasticks-deployment-scale-in-predictions.sh

In [None]:
!kubectl get pod

### Google

In [None]:
!/root/pipeline.io/bin/pipeline-context-switch-gcp-predictions.sh

In [None]:
!kubectl get pod

In [None]:
!/root/pipeline.io/bin/pipeline-deployment-scale-in-predictions.sh

In [None]:
!kubectl get pod