In [None]:
# Inputs: Define AWS region and resource identifiers.
import time, boto3
region       = boto3.Session().region_name  # use current region
bucket       = "your-s3-bucket"             # S3 bucket for input/output
train_key    = "data/your-training.csv"     # S3 key of training CSV (with header)
target_col   = "target"                     # Name of the target column in CSV
problem_type = "BinaryClassification"       # or 'MulticlassClassification' / 'Regression'
instance_type = "ml.m5.large"               # instance type for endpoint

# AutoML job configuration (AutoML V2)
auto_ml_job_name = f"autopilot-{int(time.time())}"  # unique job name with timestamp
s3_train_path = f"s3://{bucket}/{train_key}"
s3_output_path = f"s3://{bucket}/autopilot-output/{auto_ml_job_name}"

# Prepare optional feature whitelist: list all feature columns (excluding target).
# This assumes we have the header; here we demonstrate creating the JSON spec.
import io
import csv
# Download the header row from S3
s3 = boto3.client("s3")
obj = s3.get_object(Bucket=bucket, Key=train_key)
header = next(csv.reader(io.TextIOWrapper(obj["Body"])))
feature_columns = [col for col in header if col != target_col]
# Upload FeatureSpecification JSON to S3
feature_spec = {"FeatureAttributeNames": feature_columns}
feat_spec_key = f"{auto_ml_job_name}/features.json"
s3.put_object(Bucket=bucket, Key=feat_spec_key, Body=str(feature_spec).encode())
feature_spec_uri = f"s3://{bucket}/{feat_spec_key}"

# Launch SageMaker Autopilot (AutoML V2) job
sm = boto3.client("sagemaker", region_name=region)
sm.create_auto_ml_job_v2(
    AutoMLJobName=auto_ml_job_name,
    AutoMLJobInputDataConfig=[{
        "ChannelType": "training",
        "ContentType": "text/csv;header=present",
        "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": s3_train_path}}
    }],
    OutputDataConfig={"S3OutputPath": s3_output_path},
    RoleArn=boto3.client("sts").get_caller_identity()["Arn"],  # assuming role is current identity
    AutoMLProblemTypeConfig={
        "TabularJobConfig": {
            "TargetAttributeName": target_col,
            "ProblemType": problem_type,
            # Let Mode default to AUTO; you could set "ENSEMBLING" or "HYPERPARAMETER_TUNING" if desired.
            "FeatureSpecificationS3Uri": feature_spec_uri,
            "CompletionCriteria": {"MaxCandidates": 3}  # limit to 3 model candidates for speed/cost
        }
    }
)
print(f"Started AutoML job: {auto_ml_job_name}")

# Poll for job completion (this may take time depending on data size and MaxCandidates)
describe_response = None
while True:
    describe_response = sm.describe_auto_ml_job_v2(AutoMLJobName=auto_ml_job_name)
    status = describe_response["AutoMLJobStatus"]
    sec_status = describe_response.get("AutoMLJobSecondaryStatus")
    print(f"Status: {status} - {sec_status}")
    if status in ["Completed", "Failed", "Stopped"]:
        break
    time.sleep(60)

if status != "Completed":
    raise RuntimeError(f"AutoML job did not complete successfully (status: {status})")

# Get best candidate and create a SageMaker model from it
# (Autopilot V2 returns the best candidate's container definitions for inference)
best_candidate = describe_response["BestCandidate"]
inf_containers = best_candidate["InferenceContainers"]
model_name = f"{auto_ml_job_name}-model"
sm.create_model(
    ModelName=model_name,
    Containers=inf_containers,            # Use all containers from the best candidate:contentReference[oaicite:6]{index=6}
    ExecutionRoleArn=boto3.client("iam").get_user()["User"].arn  # assuming the current IAM user/role ARN
)

# Create or update endpoint configuration and endpoint
endpoint_config_name = f"{auto_ml_job_name}-config"
sm.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[{
        "VariantName": "AllTraffic",
        "ModelName": model_name,
        "InstanceType": instance_type,
        "InitialInstanceCount": 1
    }]
)
endpoint_name = "autopilot-poc-endpoint"  # stable endpoint name for this PoC
try:
    # If endpoint exists, update it with new config; otherwise create a new endpoint
    sm.describe_endpoint(EndpointName=endpoint_name)
    print(f"Updating existing endpoint: {endpoint_name}")
    sm.update_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name)
except sm.exceptions.ClientError:
    print(f"Creating new endpoint: {endpoint_name}")
    sm.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name)

# Wait for endpoint to be in service
waiter = sm.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=endpoint_name)
print(f"Endpoint {endpoint_name} is InService")

# Invoke the endpoint with a sample from the CSV (excluding the target column)
import base64
runtime = boto3.client("sagemaker-runtime", region_name=region)
# Take the first data row (after header) from the training CSV for a test prediction
lines = obj["Body"].read().decode().splitlines()
if len(lines) > 1:
    sample = lines[1].split(",")
    # Remove the target value from the sample if present
    if target_col in header:
        target_index = header.index(target_col)
        sample.pop(target_index)
    payload = ",".join(sample)
else:
    payload = ""  # fallback if no data
if payload:
    response = runtime.invoke_endpoint(EndpointName=endpoint_name,
                                       ContentType="text/csv",
                                       Body=payload)
    prediction = response["Body"].read().decode()
    print("Sample prediction:", prediction)
