#### Imports 

In [None]:
from sagemaker import get_execution_role
from time import gmtime, strftime
import pandas as pd
import sagemaker
import boto3
import time

#### Essentials

In [None]:
bucket = sagemaker.Session().default_bucket()
prefix = 'loan-default-prediction'
region = 'us-east-1'

In [None]:
batch_input = f's3://{bucket}/{prefix}/batch_input/'
batch_input

In [None]:
batch_output = f's3://{bucket}/{prefix}/batch_output/'
batch_output

In [None]:
current_timestamp = strftime('%Y-%m-%d-%H-%M-%S', gmtime())

In [None]:
automl_job_name = 'predict-loan-default'  # Copy this from the console
model_name = f'autopilot-best-model-{current_timestamp}'
transform_job_name = f'autopilot-batch-job-{current_timestamp}'

In [None]:
session = boto3.Session()
sagemaker_execution_role = get_execution_role()
sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client('sagemaker', region_name=region)
s3_client = boto3.client('s3')

#### Copy batch input data from local to S3

In [None]:
!aws s3 cp ./data/train/loans_unlabeled.csv {batch_input}

#### Get the best model using Autopilot job name

In [None]:
best_candidate = sagemaker_client.describe_auto_ml_job(AutoMLJobName=automl_job_name)["BestCandidate"]
best_candidate_name = best_candidate["CandidateName"]
print(f"CandidateName: {best_candidate_name}")
print(f'FinalAutoMLJobObjectiveMetricName: {best_candidate["FinalAutoMLJobObjectiveMetric"]["MetricName"]}')
print(f'FinalAutoMLJobObjectiveMetricValue: {best_candidate["FinalAutoMLJobObjectiveMetric"]["Value"]}')

In [None]:
model = sagemaker_client.create_model(Containers=best_candidate["InferenceContainers"], 
                                      ModelName=model_name, 
                                      ExecutionRoleArn=sagemaker_execution_role)

print(f'Model ARN corresponding to the best candidate is : {model["ModelArn"]}')

#### Create Batch Transform job 

In [None]:
transform_input = {
    "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": batch_input}},
    "ContentType": "text/csv",
    "CompressionType": "None",
    "SplitType": "Line",
}

transform_output = {
    "S3OutputPath": batch_output,
}

transform_resources = {"InstanceType": "ml.m5.4xlarge", "InstanceCount": 1}

sagemaker_client.create_transform_job(
    TransformJobName=transform_job_name,
    ModelName=model_name,
    TransformInput=transform_input,
    TransformOutput=transform_output,
    TransformResources=transform_resources,
)

#### Check the status of the running job

In [None]:
print("[JobStatus]\n")


describe_response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
job_run_status = describe_response["TransformJobStatus"]
print(job_run_status)

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
    job_run_status = describe_response["TransformJobStatus"]
    print(job_run_status)
    time.sleep(30)

#### Download the output of the batch transform job from S3 to local

In [None]:
s3_output_key = f"{prefix}/batch_output/loans_unlabeled.csv.out"
local_inference_results_path = "./data/train/inference_results.csv"

s3 = boto3.resource("s3")


inference_results_bucket = s3.Bucket(sagemaker_session.default_bucket())

inference_results_bucket.download_file(s3_output_key, local_inference_results_path)

#### Inspect the results

In [None]:
data = pd.read_csv(local_inference_results_path, sep=";")
pd.set_option("display.max_rows", 10)  
data