In [23]:
import requests
import pandas as pd
import numpy as np
import datetime
from sklearn.datasets import load_iris
import os
import datetime
import pickle
import uuid
from domino.data_sources import DataSourceClient

# UPDATE: Your Model Parameters
external_datasource = "demo-bucket"
datasourceType = "s3"
DMM_datasource_name = "se-demo-bucket"
domino_url = "demo2.dominodatalab.com"
DMM_model_id = "6628103c965e21e5b0d56b29"

# Today's date
date = datetime.datetime.today()
month = date.month
day = date.day - 1
year = date.year

# Load data for scoring
data = load_iris()
df = pd.DataFrame(data = data['data'], columns = data.feature_names)
df['variety'] = data['target']

scoring_data = df[data.feature_names].copy()

# Jitter the scoring data
for row in scoring_data.iterrows():
    for c in scoring_data.columns:
        scoring_data[c] = np.maximum(0.1, scoring_data[c] + np.random.normal()/25)

# Load the "external" model
file_name = "/mnt/code/models/xgb_iris.pkl"
model = pickle.load(open(file_name, "rb"))

# Get model predictions (numeric)
scoring_data = scoring_data.values.tolist()
model_predictions = model.predict(scoring_data)

# Create the scoring dataset for model moniotring

# Data that was scored, model predictions (as strings), timestamp and event ID for model qulaity monitoring.  
predictions = pd.DataFrame(scoring_data, columns=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)' ])
predictions['predictions'] = model_predictions
predictions['variety'] = [data.target_names[y] for y in predictions['predictions']]
predictions.drop('predictions', axis=1, inplace=True)
predictions['timestamp']= datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
event_ids = [uuid.uuid4() for x in range(predictions.shape[0])]
predictions['event_id'] = event_ids

# Save version to Domino Dataset for future reference
predictions.to_csv('/mnt/data/{}/external_iris_scoring_data_{}_{}_{}.csv'.format(os.environ.get('DOMINO_PROJECT_NAME'), month, day, year), index=False)

print("Scoring data saved to project's Domino Dataset")

# Create the "dummy" ground truth dataset

ground_truth = pd.DataFrame(columns=['event_id', 'iris_ground_truth'])
ground_truth['event_id'] = predictions['event_id']
ground_truth['iris_ground_truth'] = predictions['variety']

# These row labels help find some diferent iris types in our initial scoring data
end_index = predictions.shape[0]
mid_index = int(round(predictions.shape[0] / 2, 0))

# Simulate some classifcation errors. This makes our confusion matrix interesting.
ground_truth.iloc[0, 1] = 'virginica'
ground_truth.iloc[1, 1] = 'versicolor'
ground_truth.iloc[mid_index-1, 1] = 'versicolor'
ground_truth.iloc[mid_index, 1] = 'virginica'
ground_truth.iloc[end_index-2, 1] = 'setosa'
ground_truth.iloc[end_index-1, 1] = 'setosa'

# Save each version locally 
ground_truth.to_csv('/mnt/data/{}/external_iris_ground_truth_{}_{}_{}.csv'.format(os.environ.get('DOMINO_PROJECT_NAME'), month, day, year), index=False)

print("Ground truth data saved to project's Domino Dataset")
print("Done!")



Scoring data saved to project's Domino Dataset
Ground truth data saved to project's Domino Dataset
Done!


In [25]:
import requests
import pandas as pd
import numpy as np
import datetime
from sklearn.datasets import load_iris
import os
import datetime
import pickle
import uuid
from domino.data_sources import DataSourceClient

# UPDATE: Your Model Parameters
external_datasource = "demo-bucket"
datasourceType = "s3"
DMM_datasource_name = "se-demo-bucket"
domino_url = "demo2.dominodatalab.com"
DMM_model_id = "6628103c965e21e5b0d56b29"

# Today's date
date = datetime.datetime.today()
month = date.month
day = date.day - 1
year = date.year

# Load data for scoring
data = load_iris()
df = pd.DataFrame(data = data['data'], columns = data.feature_names)
df['variety'] = data['target']

scoring_data = df[data.feature_names].copy()
scoring_data
# predictions_df.to_csv('/mnt/code/data/external_model_scoring_data.csv', index=False)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [22]:
import os
import datetime
import requests
from domino.data_sources import DataSourceClient

# UPDATE: Your Model Parameters
external_datasource = "demo-bucket"
datasourceType = "s3"
DMM_datasource_name = "se-demo-bucket"
domino_url = "demo2.dominodatalab.com"
DMM_model_id = "6628103c965e21e5b0d56b29"

API_key = os.environ['MY_API_KEY']

# Today's date
date = datetime.datetime.today()
month = date.month
day = date.day -1
year = date.year

# Today's scoring file name
scoring_file_name = "external_iris_scoring_data_{}_{}_{}.csv".format(month, day, year)

# Today's ground truth file name
gt_file_name = "external_iris_ground_truth_{}_{}_{}.csv".format(month, day, year)

# Upload scoring data to DMM data source using a Domino data source (s3 in this example)

# instantiate a client and fetch the datasource instance
object_store = DataSourceClient().get_datasource("{}".format(external_datasource)) # Update

# Upload scoring and ground truth data to monitoring data source
object_store.upload_file(scoring_file_name, '/mnt/data/{}/{}'.format(os.environ.get('DOMINO_PROJECT_NAME'), scoring_file_name))
object_store.upload_file(gt_file_name, '/mnt/data/{}/{}'.format(os.environ.get('DOMINO_PROJECT_NAME'), gt_file_name))

# Update scoring and ground truth file paths with model monitoring API

# This step only updates the file paths, and assumes the external model has already been registered in DMM! See "External_DMM_Quickstart.ipynb"

print('Registering {} from {} data source in DMM'.format(scoring_file_name, external_datasource))

scoring_data_url = "https://{}/model-monitor/v2/api/model/{}/register-dataset/prediction".format(domino_url, DMM_model_id)

# Set up call headers
headers = {
           'X-Domino-Api-Key': API_key,
           'Content-Type': 'application/json'
          }

 
scoring_data_payload = """
{{
    "datasetDetails": {{
            "name": "{0}",
            "datasetType": "file",
            "datasetConfig": {{
                "path": "{0}",
                "fileFormat": "csv"
            }},
            "datasourceName": "{1}",
            "datasourceType": "{2}"
        }}
}}
""".format(scoring_file_name, DMM_datasource_name, datasourceType)
 
# Make api call
scoring_data_response = requests.request("PUT", scoring_data_url, headers=headers, data = scoring_data_payload)
 
# Print response
print(scoring_data_response.text.encode('utf8'))

# print('Registering {} from {} data source in DMM'.format(gt_file_name, external_datasource))

# ground_truth_url = "https://{}/model-monitor/v2/api/model/{}/register-dataset/ground_truth".format(domino_url, DMM_model_id)


Registering external_iris_scoring_data_4_23_2024.csv from demo-bucket data source in DMM
b''
