In [1]:
import sys
import numpy as np
import pandas as pd
import mmlspark
##### refer to this url :https://github.com/Azure/mmlspark/blob/master/notebooks/samples/108%20-%20Model%20Deployment%20with%20Spark%20Serving.ipynb

In [2]:
dataFilePath = "AdultCensusIncome.csv"
import os, urllib
if not os.path.isfile(dataFilePath):
    urllib.request.urlretrieve("https://mmlspark.azureedge.net/datasets/" + dataFilePath, dataFilePath)
data = spark.createDataFrame(pd.read_csv(dataFilePath, dtype={" hours-per-week": np.float64}))
data = data.select([" education", " marital-status", " hours-per-week", " income"])
train, test = data.randomSplit([0.75, 0.25], seed=123)
train.limit(10).toPandas()

In [3]:
from mmlspark import TrainClassifier
from pyspark.ml.classification import LogisticRegression
model = TrainClassifier(model=LogisticRegression(), labelCol=" income", numFeatures=256).fit(train)

In [4]:
from mmlspark import ComputeModelStatistics, TrainedClassifierModel
prediction = model.transform(test)
prediction.printSchema()

In [5]:
metrics = ComputeModelStatistics().transform(prediction)
metrics.limit(10).toPandas()

In [6]:
from pyspark.sql.functions import col, from_json
from pyspark.sql.types import *
import uuid

serving_inputs = spark.readStream.server() \
    .address("localhost", 8899, "my_api") \
    .load()\
    .withColumn("variables", from_json(col("value"), test.schema))\
    .select("id","variables.*")

serving_outputs = model.transform(serving_inputs) \
  .withColumn("scored_labels", col("scored_labels").cast("string"))

server = serving_outputs.writeStream \
    .server() \
    .option("name", "my_api") \
    .queryName("my_query") \
    .option("replyCol", "scored_labels") \
    .option("checkpointLocation", "checkpoints-{}".format(uuid.uuid1())) \
    .start()

In [7]:
serving_inputs

In [8]:
import requests
data = u'{" education":" 10th"," marital-status":" Divorced"," hours-per-week":40.0}'
r = requests.post(data=data, url="http://localhost:8899/my_api")
print("Response {}".format(r.text))

In [9]:
import requests
data = u'{" education":" Masters"," marital-status":" Married-civ-spouse"," hours-per-week":40.0}'
r = requests.post(data=data, url="http://localhost:8899/my_api")
print("Response {}".format(r.text))

In [10]:
import time
time.sleep(20) # wait for server to finish setting up (just to be safe)
server.stop()