In [None]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os

import boto3
import re # python regex module
from sagemaker import get_execution_role
import sagemaker

# SDK 2 serializers and deserializers
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

<h1>XGBoost Cloud Prediction - Iris Classification</h1>
<h4>Invoke SageMaker Prediction Service</h4>

In [None]:
# Acquire a realtime endpoint
endpoint_name = 'xgboost-iris-v1' #DWB# Checked from console - matches
predictor = sagemaker.predictor.Predictor (endpoint_name=endpoint_name)

In [None]:
predictor.serializer = CSVSerializer()

In [None]:
# Test predictive quality against data in validation file
df_all = pd.read_csv('iris_validation.csv',
                     names=['encoded_class','sepal_length','sepal_width','petal_length','petal_width'])

In [None]:
df_all.head()

In [None]:
df_all.columns

In [None]:
# Need to pass an array to the prediction
# can pass a numpy array or a list of values [[19,1],[20,1]]
# arr_test = df_all.as_matrix(['sepal_length', 'sepal_width', 'petal_length','petal_width'])
arr_test = df_all[['sepal_length', 'sepal_width', 'petal_length','petal_width']].values

In [None]:
type(arr_test)

In [None]:
arr_test.shape

In [None]:
arr_test[:5]

In [None]:
result = predictor.predict(arr_test[:2])

In [None]:
arr_test.shape

In [None]:
#DWB#  I think that repeat might not be on purpose;
#DWB#+ Let's check instead result.shape
try:
    print(str(result.shape))
except Exception as e:
    print('', file=sys.stderr)
    print("That didn't work.", file=sys.stderr)
    print(f"str(e) is: `{str(e)}`", file=sys.stderr)
    print('', file=sys.stderr)
finally:
    print("That tells us what we need to know.")
##endof:  try/except/finally

In [None]:
result

In [None]:
# For large number of predictions, we can split the input data and
# Query the prediction service.
# array_split is convenient to specify how many splits are needed

# Splitting using regular expression as xgboost 1-2-2 is returning
# predicted values with inconsistent delimiters (comma, newline or both)

# pattern looks for one or more of non-numeric characters
pattern = r'[^0-9.]+'

predictions = []
#DWB# added the next 2 lines
total_row_count = 0
n_columns_and_count = {}

for arr in np.array_split(arr_test,10):
    result = predictor.predict(arr)    
    result = re.split(pattern,result.decode("utf-8"))
    print (arr.shape)
    #DWB# Here is what we can match up
    #DWB# <shape-consistency-check>
    this_chunk_shape = arr.shape
    this_row_count = this_chunk_shape[0]
    total_row_count += this_row_count
    this_col_count = this_chunk_shape[1]
    if this_col_count in n_columns_and_count:
        n_columns_and_count[this_col_count] += 1
    else:
        n_columns_and_count[this_col_count] = 1
    ##endof:  if/else this_col_count in n_columns_and_count
    #DWB# </shape-consistency-check>
    predictions += [int(float(r)) for r in result if r != ""]

#DWB# It's me from here on out.

print()
print("# Looking at the chunks all together #")
print(f"The total number of rows is: {total_row_count}")
print("For each row, I counted the number of columns;")
print("here is the distribution of column counts.")
print(n_columns_and_count)
print()
print("Having inspected that, I can see that")
print("the shape of all the chunks combined is")
print("(45, 4), which matches our original")
print("arr_test.")

In [None]:
len(predictions)

In [None]:
predictions[:5]

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

In [None]:
df_all['class'] = le.inverse_transform(df_all.encoded_class)

In [None]:
df_all['predicted_class']=le.inverse_transform(predictions)

In [None]:
df_all.head()

In [None]:
print('Confusion matrix - Actual versus Predicted')
pd.crosstab(df_all['class'], df_all['predicted_class'])

In [None]:
import sklearn.metrics as metrics
print(metrics.classification_report(df_all['class'], df_all['predicted_class']))

In [None]:
#DWB#  Still in this second one there's no Endpoint-deletion Code.
#DWB#+ I will put some in, here.
#DWB#+ As Chandra wrote with the previous such code
# Delete Endpoint to prevent unnecessary charges
predictor.delete_endpoint()

In [None]:
#  I checked the list of endpoints from the AWS Console > Sagemaker ...
#+ and the endpoint that was there is gone.