# EmOpti Workshop - AutoPilot Batch Transform

This notebook runs the same test data through a trained model that was created by AutoPilot

Kernel `Python 3 (Data Science)` works well with this notebook

In [None]:
import sagemaker
import boto3
from sagemaker import get_execution_role
from time import gmtime, strftime, sleep

region = boto3.Session().region_name

session = sagemaker.Session()
s3bucket = session.default_bucket()
s3bucket = 'am-tmp2'
s3prefix = "emopti"

role = get_execution_role()
sm = boto3.Session().client(service_name="sagemaker", region_name=region)

### Model Artifact
You must enter the S3 URL of a Model created by AutoPilot

In [None]:
model_artifact = 's3://am-tmp2/emopti2/emopti2-exp1/tuning/emopti2-ex-dpp2-xgb/emopti2-exp1KTbVpBNcXGZQGbIuIKlA-102-17f0ab7a/output/model.tar.gz'

### Create the Model 
Creating a Model makes it easy to create a *Batch Transform* job using the Model specified above

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri

model_name = f'autopilot-model-{strftime("%Y%m%d-%H%M", gmtime())}'
container = get_image_uri(boto3.Session().region_name, "xgboost", "1.3-1")

model = sm.create_model(
    ModelName = model_name,
    PrimaryContainer={
        'Image': container,
        'ModelDataUrl': model_artifact,
        'Environment': {}
    },
    ExecutionRoleArn = role
)

model

Now that our Model is created, let's create our *Batch Transform* job

In [None]:
transform_job_name = f'autopilot-transform-{strftime("%Y%m%d-%H%M", gmtime())}'

transform_input = {
    "DataSource": {
        "S3DataSource": {
            "S3DataType": "S3Prefix", 
            "S3Uri": f's3://{s3bucket}/{s3prefix}/automl/data/test.csv'
        }},
    "ContentType": "text/csv",
    "CompressionType": "None",
    "SplitType": "Line",
}

transform_output = {
    "S3OutputPath": f"s3://{s3bucket}/{s3prefix}/autopilot/results/inference",
}

transform_resources = {
    "InstanceType": "ml.m5.4xlarge", 
    "InstanceCount": 1
}

sm.create_transform_job(
    TransformJobName=transform_job_name,
    ModelName=model_name,
    TransformInput=transform_input,
    TransformOutput=transform_output,
    TransformResources=transform_resources,
)

Wait for the *Batch Transform* job to complete

In [None]:
print("JobStatus")
print("----------")

describe_response = sm.describe_transform_job(TransformJobName=transform_job_name)
job_run_status = describe_response["TransformJobStatus"]
print(job_run_status)

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sm.describe_transform_job(TransformJobName=transform_job_name)
    job_run_status = describe_response["TransformJobStatus"]
    print(job_run_status)
    sleep(30)

In [None]:
describe_response

Download the results file from our *Batch Transform* job

In [None]:
import pandas as pd

s3_output_key = f"{s3prefix}/autopilot/results/inference/test.csv.out"
local_inference_results_path = "autopilot-inference_results.csv"

s3 = boto3.resource("s3")
inference_results_bucket = s3.Bucket(s3bucket)
inference_results_bucket.download_file(s3_output_key, local_inference_results_path)

df_preds = pd.read_csv(local_inference_results_path, sep=";")
pd.set_option("display.max_rows", 20)  # Keep the output on one page
df_preds

Get the Labels for our Test Data and show only the ADMIT rows so we can see the count of ADMITS

In [None]:
from sklearn.metrics import confusion_matrix

df_test = pd.read_csv('data/test_labels.csv')
df_test[df_test['DISCHARGE'] == 'ADMIT']


### Confusion Matrix

In [None]:
cm = confusion_matrix(df_test, data[1:])
cm

In [None]:
import numpy as np
import seaborn as sns
import matplotlib

#labels = [f'True Neg\n{cm[0][0]}', f'False Pos\n{cm[0][1]}', f'False Neg\n{cm[1][0]}', f'True Pos\n{cm[1][1]}']
#labels = np.asarray(labels).reshape(2,2)
ax = sns.heatmap(cm, annot=True, fmt='', cmap='Blues')
ax.set_xticklabels(['ADMIT', 'DISCHARGE'])
ax.set_yticklabels(['ADMIT', 'DISCHARGE'])
ax.set(ylabel = "True Label", xlabel = "Predicted Label")

