In [None]:
import boto3, json
sm = boto3.client("sagemaker")

JOB = "your-autopilot-v2-job-name"
d = sm.describe_auto_ml_job_v2(AutoMLJobName=JOB)
print("Status:", d["AutoMLJobStatus"], d.get("AutoMLJobSecondaryStatus"))
print("FailureReason:", d.get("FailureReason"))
print("PartialFailureReasons:", json.dumps(d.get("PartialFailureReasons", []), indent=2))

In [None]:
cands = sm.list_candidates_for_auto_ml_job(AutoMLJobName=JOB, StatusEquals="Failed")
for c in cands.get("Candidates", []):
    print("Candidate:", c["CandidateName"])
    for step in c.get("CandidateSteps", []):
        if step["CandidateStepType"] == "TRAINING_JOB":
            print("  TrainingJob:", step["CandidateStepArn"].split("/")[-1])

In [None]:
import boto3, json, urllib.parse, os

sm   = boto3.client("sagemaker")
logs = boto3.client("logs")
region = boto3.Session().region_name

JOB = "your-autopilot-v2-job-name"  # <-- put the running/just-failed job here

d = sm.describe_auto_ml_job_v2(AutoMLJobName=JOB)
print("== Job Status ==")
print(d["AutoMLJobStatus"], "-", d.get("AutoMLJobSecondaryStatus"))
print("\n== FailureReason ==")
print(d.get("FailureReason", "(none yet)"))
print("\n== PartialFailureReasons ==")
print(json.dumps(d.get("PartialFailureReasons", []), indent=2))

print("\n== Failed candidate training jobs ==")
failed = sm.list_candidates_for_auto_ml_job(AutoMLJobName=JOB, StatusEquals="Failed")
tjobs = []
for c in failed.get("Candidates", []):
    cname = c["CandidateName"]
    for step in c.get("CandidateSteps", []):
        if step["CandidateStepType"] == "TRAINING_JOB":
            tname = step["CandidateStepArn"].split("/")[-1]
            tjobs.append(tname)
            print(f"- Candidate: {cname}  TrainingJob: {tname}")

if not tjobs:
    print("(none yet)")

# Quick CloudWatch link(s) you can click
if tjobs:
    print("\n== Open CloudWatch logs ==")
    for t in tjobs[:3]:  # show first 3
        # CW logs group for training: /aws/sagemaker/TrainingJobs , stream is the training job name
        link = (
            f"https://{region}.console.aws.amazon.com/cloudwatch/home"
            f"?region={region}#logsV2:log-groups/log-group/$252Faws$252Fsagemaker$252FTrainingJobs"
            f"/log-events/{urllib.parse.quote(t)}"
        )
        print(f"{t}: {link}")


In [None]:
import csv, io, boto3, collections, math

s3 = boto3.client("s3")
bucket = "<your-bucket>"
key    = "<your-train.csv>"
target = "<your_target_col>"

obj = s3.get_object(Bucket=bucket, Key=key)
rows = list(csv.reader(io.TextIOWrapper(obj["Body"], encoding="utf-8")))
header = rows[0]
assert target in header, f"Target '{target}' not in header {header}"
tidx = header.index(target)

classes = collections.Counter()
bad = 0
for r in rows[1:5000]:  # sample first 5k rows
    if len(r) != len(header): bad += 1; continue
    y = r[tidx]
    if y in ("", None): bad += 1
    else: classes[y] += 1

print("Header OK. Sampled rows:", len(rows)-1, "Malformed rows:", bad)
print("Class distribution (sample):", classes.most_common(10))
assert len(classes) >= 2, "Multiclass needs >= 2 classes present"
