In [1]:
"""
PySpark Spark Scoring with DataRobot Scoring Code
"""
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession, DataFrame, SQLContext, DataFrame
from py4j.java_gateway import java_import
from pyspark.sql.types import *
from pyspark.sql.functions import udf
from datetime import datetime
import sys
start_time = datetime.now()
spark = SparkSession(SparkContext.getOrCreate())
sqlContext = spark._wrapped
scoringDF = sql("select * from 10k_lending_club_loans_with_id_csv")
java_import(spark._jvm, 'com.datarobot.prediction.Predictors')
java_import(spark._jvm, 'com.datarobot.prediction.spark.CodegenModel')
java_import(spark._jvm, 'com.datarobot.prediction.spark.Predictors')

codeGenModel = spark._jvm.com.datarobot.prediction.spark.Predictors.getPredictor('5ed708a8fca6a1433abddbcb')

result = DataFrame(codeGenModel.transform(scoringDF._jdf), sqlContext)
result.createOrReplaceTempView('lending_club_loans_predictions')
result.head(1)
end_time = datetime.now()
predictionTime = (end_time - start_time).seconds
print('completed in %s seconds' % predictionTime)

In [2]:
"""
Report prediction metrics with MLOps
"""
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession, DataFrame, SQLContext, DataFrame
from py4j.java_gateway import java_import
from pyspark.sql.types import *
from pyspark.sql.functions import udf

spark = SparkSession(SparkContext.getOrCreate())
sqlContext = spark._wrapped
java_import(spark._jvm, 'com.datarobot.mlops.spark.MLOpsSparkUtils')
scoringDF = spark.sql("select * from 10k_lending_club_loans_with_id_csv as lending_club_loans_predictions")
channelConfig = dbutils.secrets.get(scope="AzureDRdemo", key="mlopschannelconfig")

spark._jvm.com.datarobot.mlops.spark.MLOpsSparkUtils.reportPredictions(
                        scoringDF._jdf, # scoring data with predicions
                        "5ec3313de71c4404eef2d642", # external DeploymentId 
                        "5ec33139f688223b1a84ed78", # external ModelId
                        channelConfig, # MLOps channel configuration
                        float(predictionTime), # actual scoring time
                        ["PREDICTION"], # target columns
                        "id" # AssociationId
                       )

In [3]:
"""
Report actuals with MLOps
"""
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession, DataFrame, SQLContext, DataFrame
from py4j.java_gateway import java_import
from pyspark.sql.types import *
from pyspark.sql.functions import udf


spark = SparkSession(SparkContext.getOrCreate())
sqlContext = spark._wrapped
java_import(spark._jvm, 'com.datarobot.mlops.spark.MLOpsSparkUtils')
channelConfig = dbutils.secrets.get(scope="AzureDRdemo", key="mlopschannelconfig")
actualsDF = spark.sql("select id as associationId, loan_amnt as actualValue, null as timestamp  from 10k_lending_club_loans_with_id_csv as actuals")

spark._jvm.com.datarobot.mlops.spark.MLOpsSparkUtils.reportActuals(actualsDF._jdf, # scoring data with predicions
                                                          "5ec3313de71c4404eef2d642", # external DeploymentId 
                                                          "5ec33139f688223b1a84ed78", # external ModelId
                                                          channelConfig # MLOps channel configuration
                                                         )