In [None]:
# --- Inputs (Studio-friendly) ---
import time, io, csv, json, boto3, sagemaker
from sagemaker import get_execution_role

session = sagemaker.Session()
region  = session.boto_region_name
bucket  = session.default_bucket()  # Studio-managed default bucket
train_key     = "autopilot-demo/train.csv"   # CSV with header (includes target)
target_col    = "target"
feature_columns = ["feature_1", "feature_2", "feature_3"]
problem_type  = "MulticlassClassification"   # or BinaryClassification / Regression
objective     = "Accuracy"
mode          = "AUTO"                       # AUTO | ENSEMBLING | HYPERPARAMETER_TUNING
instance_type = "ml.m5.large"
endpoint_name = "autopilot-poc-endpoint"

s3 = boto3.client("s3", region_name=region)
sm = boto3.client("sagemaker", region_name=region)
rt = boto3.client("sagemaker-runtime", region_name=region)
role = get_execution_role()

job_name         = f"autopilot-{int(time.time())}"
s3_train_path    = f"s3://{bucket}/{train_key}"
s3_output_path   = f"s3://{bucket}/autopilot-output/{job_name}"
feature_spec_key = f"{job_name}/features.json"

# --- Read header & build feature spec (exclude target) ---
obj = s3.get_object(Bucket=bucket, Key=train_key)
header = next(csv.reader(io.TextIOWrapper(obj["Body"], encoding="utf-8")))
assert target_col in header, f"Target '{target_col}' not found in {header}"

feature_spec = {"FeatureAttributeNames": feature_columns}
s3.put_object(Bucket=bucket, Key=feature_spec_key, Body=json.dumps(feature_spec).encode("utf-8"))
feature_spec_uri = f"s3://{bucket}/{feature_spec_key}"

# --- Launch AutoML V2 job ---
sm.create_auto_ml_job_v2(
    AutoMLJobName=job_name,
    AutoMLJobInputDataConfig=[{
        "ChannelType": "training",
        "ContentType": "text/csv;header=present",
        "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": s3_train_path}}  
    }],
    OutputDataConfig={"S3OutputPath": s3_output_path},
    RoleArn=role,
    AutoMLJobObjective={"MetricName": objective},
    AutoMLProblemTypeConfig={
        "TabularJobConfig": {
            "ProblemType": problem_type,
            "Mode": mode,
            "FeatureSpecificationS3Uri": feature_spec_uri,
            "CompletionCriteria": {
                "MaxCandidates": 3,
                "MaxRuntimePerTrainingJobInSeconds": 1800,
                "MaxAutoMLJobRuntimeInSeconds": 7200
            },
            "TargetAttributeName": target_col
        }
    }
)
print("Started:", job_name)

# --- Poll with gentle backoff ---
sleep = 30
while True:
    d = sm.describe_auto_ml_job_v2(AutoMLJobName=job_name)
    st = d["AutoMLJobStatus"]; sec = d.get("AutoMLJobSecondaryStatus")
    print("Status:", st, "-", sec)
    if st in ("Completed", "Failed", "Stopped"): break
    time.sleep(sleep); sleep = min(sleep + 10, 120)

if st != "Completed":
    raise RuntimeError(f"AutoML V2 failed: {st} ({sec})")

# --- Deploy best candidate (multi-container aware) ---
best = d["BestCandidate"]
model_name = f"{job_name}-model"
cfg_name   = f"{job_name}-cfg"

sm.create_model(
    ModelName=model_name,
    Containers=best["InferenceContainers"],  # handles pipelines
    ExecutionRoleArn=role
)
sm.create_endpoint_config(
    EndpointConfigName=cfg_name,
    ProductionVariants=[{
        "VariantName": "AllTraffic",
        "ModelName": model_name,
        "InstanceType": instance_type,
        "InitialInstanceCount": 1
    }]
)

# Create or update endpoint
try:
    sm.describe_endpoint(EndpointName=endpoint_name)
    print("Updating endpoint:", endpoint_name)
    sm.update_endpoint(EndpointName=endpoint_name, EndpointConfigName=cfg_name)
except sm.exceptions.ResourceNotFound:
    print("Creating endpoint:", endpoint_name)
    sm.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=cfg_name)

sm.get_waiter("endpoint_in_service").wait(EndpointName=endpoint_name)
print("Endpoint InService:", endpoint_name)

# --- Quick sanity check: build payload in feature_columns order ---
obj2 = s3.get_object(Bucket=bucket, Key=train_key)
lines = obj2["Body"].read().decode("utf-8").splitlines()
if len(lines) > 1:
    row = dict(zip(header, next(csv.reader([lines[1]]))))
    payload = ",".join([row.get(col, "") for col in feature_columns]) + "\n"
    resp = rt.invoke_endpoint(EndpointName=endpoint_name, ContentType="text/csv", Body=payload)
    print("Sample prediction:", resp["Body"].read().decode("utf-8"))
