In [1]:
import time
import os
import numpy as np
import pandas as pd
import csv
import pytz
import datetime
import argparse

from datarobot.mlops.mlops import MLOps
from datarobot.mlops.connected.client import MLOpsClient
from datarobot.mlops.constants import Constants
from sklearn.ensemble import RandomForestClassifier


In [2]:
# read the training dataset
split_ratio = 0.8
prediction_threshold = 0.5

dataset_filename = "datasets/mlops-example-surgical-dataset.csv"

df = pd.read_csv(dataset_filename)

columns = list(df.columns)
arr = df.to_numpy()

np.random.shuffle(arr)

train_data_len = int(arr.shape[0] * split_ratio)

train_data = arr[:train_data_len, :-1]
label = arr[:train_data_len, -1]
test_data = arr[train_data_len:, :-1]
test_df = df[train_data_len:]

# train the model
clf = RandomForestClassifier(n_estimators=10, max_depth=2, random_state=0)
clf.fit(train_data, label)


RandomForestClassifier(max_depth=2, n_estimators=10, random_state=0)

In [3]:
# make predictions
start_time = time.time()
predictions_array = clf.predict_proba(test_data)
end_time = time.time()

In [None]:
#Create a model package and create the deployment
#Run this only once! Or at least clean up after so you don't end up with a lot of deployments
# Create and connect the client
endpoint = os.environ['MLOPS_SERVICE_URL']
token = os.environ['MLOPS_API_TOKEN']
DEPLOYMENT_NAME="API Example Classification Deployment"

model_info = {
        "name": "API Example Classification",
        "modelDescription": {
            "description": "API Example binary Classifier",
            "buildEnvironmentType":"Python",
            "modelName":"API Example Classifier"
        },
        "target": {
            "type": "Binary",
            "name": "complication",
            "predictionThreshold":".5",
            "classNames":["1","0"]}
        }

mlops_client = MLOpsClient(endpoint, token)

# Add training_data to model configuration
dataset_id = mlops_client.upload_dataset(dataset_filename)
model_info["datasets"] = {"trainingDataCatalogId": dataset_id}

# Create the model package
model_pkg_id = mlops_client.create_model_package(model_info)
model_pkg = mlops_client.get_model_package(model_pkg_id)
model_id = model_pkg["modelId"]

# Deploy the model package
deployment_id = mlops_client.deploy_model_package(model_pkg["id"], DEPLOYMENT_NAME)

# Enable data drift tracking
mlops_client.update_deployment_settings(deployment_id, target_drift=True,
                                                  feature_drift=True)
_ = mlops_client.get_deployment_settings(deployment_id)

print("DEPLOYMENT_ID=%s, MODEL_ID=%s" % (deployment_id, model_id))

DEPLOYMENT_ID = deployment_id
MODEL_ID = model_id

In [None]:
#MLOps code for initializing Reporting library
m = MLOps().set_deployment_id(DEPLOYMENT_ID).set_kafka_spooler(topic_name='mlops-agent', 
                                                               bootstrap_servers='52.137.84.88:9092').init()
#m = MLOps().set_filesystem_spooler('//tmp').init()




In [None]:
#prepare data to report to DR
   
target_column_name = columns[len(columns) - 1]
target_values = []
orig_labels = test_df[target_column_name].tolist()
# Based on prediction value and the threshold assign correct label to each prediction
reporting_predictions = []
for index, value in enumerate(predictions_array.tolist()):
    if len(value) == 1:
        # Random forest classifier from scikit-learn can return a single probability value
        # instead of 2 values.  We need to infer the other one before reporting predictions,
        # because, 'report_predictions_data' expects probability for each class.
        value.append(1 - value[0])
    reporting_predictions.append(value)
    if value[0] < prediction_threshold:
        target_values.append("0.0")
    else:
        target_values.append("1.0")

feature_df = test_df.copy()
feature_df[target_column_name] = target_values




In [None]:
#Report data to DataRobot
m.report_deployment_stats(predictions_array.shape[0], (end_time - start_time) * 1000)

# MLOPS: report test features and predictions and association_ids
m.report_predictions_data(
    features_df=test_df,
    predictions=reporting_predictions
)


In [None]:
#shutdown MLOps library
m.shutdown()