# Checking Pre-Requisites From Previous Notebook

In [38]:
%store -r autopilot_train_nans_s3_uri

In [39]:
try:
    autopilot_train_s3_uri
    print("[OK]")
except NameError:
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("[ERROR] PLEASE RUN THE PREVIOUS 01_PREPARE_DATASET_AUTOPILOT NOTEBOOK.")
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

[OK]


In [40]:
print(autopilot_train_nans_s3_uri)

s3://sagemaker-us-east-1-478947633708/data_nans/traindataautopilot_nans.csv


In [41]:
if not autopilot_train_s3_uri:
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("[ERROR] PLEASE RUN THE PREVIOUS 01_PREPARE_DATASET_AUTOPILOT NOTEBOOK.")
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
else:
    print("[OK]")

[OK]


In [42]:
import boto3
import sagemaker
import pandas as pd
import json

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

# Training Data

In [43]:
print(autopilot_train_nans_s3_uri)

s3://sagemaker-us-east-1-478947633708/data_nans/traindataautopilot_nans.csv


In [44]:
!aws s3 ls $autopilot_train_nans_s3_uri

2023-04-04 00:14:55       6218 traindataautopilot_nans.csv


## See our prepared training data which we use as input for Autopilot

In [45]:
!aws s3 cp $autopilot_train_nans_s3_uri ./tmp/

download: s3://sagemaker-us-east-1-478947633708/data_nans/traindataautopilot_nans.csv to tmp/traindataautopilot_nans.csv


In [46]:
import csv

df = pd.read_csv("tmp/traindataautopilot_nans.csv")
df.head()

Unnamed: 0,Date,num_stays,TotalAirlineTripstoDC,TotalAirlinePassengerstoDC,TotalAmericanTravelers,PercentofAmericanswhoTraveled,TotalTripsbyAmericans,income_total
0,2009-05-01,2,,,,,,
1,2009-06-01,2,,,,,,
2,2009-08-01,1,,,,,,
3,2009-09-01,1,,,,,,
4,2009-10-01,2,,,,,,


# Setup the S3 Location for the Autopilot-Generated Assets 
This include Jupyter Notebooks (Analysis), Python Scripts (Feature Engineering), and Trained Models.

In [47]:
prefix_model_output = "models/autopilot"

model_output_s3_uri = "s3://{}/{}".format(bucket, prefix_model_output)

print(model_output_s3_uri)

s3://sagemaker-us-east-1-478947633708/models/autopilot


In [48]:
max_candidates = 3

job_config = {
    "CompletionCriteria": {
        "MaxRuntimePerTrainingJobInSeconds": 900,
        "MaxCandidates": max_candidates,
        "MaxAutoMLJobRuntimeInSeconds": 5400,
    },
}

input_data_config = [
    {
        "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": "{}".format(autopilot_train_nans_s3_uri)}},
        "TargetAttributeName": "TotalAirlineTripstoDC",
    }
]

output_data_config = {"S3OutputPath": "{}".format(model_output_s3_uri)}

# Check For Existing Autopilot Jobs

In [49]:
existing_jobs_response = sm.list_auto_ml_jobs()

In [50]:
num_existing_jobs = 0
running_jobs = 0

if "AutoMLJobSummaries" in existing_jobs_response.keys():
    job_list = existing_jobs_response["AutoMLJobSummaries"]
    num_existing_jobs = len(job_list)
    # print('[INFO] You already created {} Autopilot job(s) in this account.'.format(num_existing_jobs))
    for j in job_list:
        if "AutoMLJobStatus" in j.keys():
            if j["AutoMLJobStatus"] == "InProgress":
                running_jobs = running_jobs + 1
    print("[INFO] You have {} Autopilot job(s) currently running << Should be 0 jobs.".format(running_jobs))
else:
    print("[OK] Please continue.")

[INFO] You have 0 Autopilot job(s) currently running << Should be 0 jobs.


# Launch the SageMaker Autopilot Job

In [51]:
from time import gmtime, strftime, sleep

In [52]:
%store -r auto_ml_job_name

try:
    auto_ml_job_name
except NameError:
    timestamp_suffix = strftime("%d-%H-%M-%S", gmtime())
    auto_ml_job_name = "automl-dm-" + timestamp_suffix
    print("Created AutoMLJobName: " + auto_ml_job_name)

In [53]:
print(auto_ml_job_name)

automl-dm-03-01-52-28


In [54]:
%store auto_ml_job_name

Stored 'auto_ml_job_name' (str)


In [55]:
sm.create_auto_ml_job(
            AutoMLJobName=auto_ml_job_name,
            InputDataConfig=input_data_config,
            OutputDataConfig=output_data_config,
            AutoMLJobConfig=job_config,
            RoleArn=role,
)

ClientError: An error occurred (ValidationException) when calling the CreateAutoMLJob operation: Dataset is not large enough: expected minimum number of rows is 500 but only 157 were found.

# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}