In [1]:
%pip freeze | grep sagemaker

sagemaker==2.235.2
sagemaker-core==1.0.77
sagemaker-experiments==0.1.45
sagemaker_training==4.9.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
from sagemaker.estimator import Estimator
from sagemaker.session import Session
from sagemaker import get_execution_role

  domain: The machine learning domain of the model and its components. Valid Values: COMPUTER_VISION \| NATURAL_LANGUAGE_PROCESSING \| MACHINE_LEARNING
  schedule_expression: A cron expression that describes details about the monitoring schedule. The supported cron expressions are:   If you want to set the job to start every hour, use the following:  Hourly: cron(0 \* ? \* \* \*)    If you want to start the job daily:  cron(0 [00-23] ? \* \* \*)    If you want to run the job one time, immediately, use the following keyword:  NOW    For example, the following are valid cron expressions:   Daily at noon UTC: cron(0 12 ? \* \* \*)    Daily at midnight UTC: cron(0 0 ? \* \* \*)    To support running every 6, 12 hours, the following are also supported:  cron(0 [00-23]/[01-24] ? \* \* \*)  For example, the following are valid cron expressions:   Every 12 hours, starting at 5pm UTC: cron(0 17/12 ? \* \* \*)    Every two hours starting at midnight: cron(0 0/2 ? \* \* \*)       Even though the 

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


  TRAINING_JOB_PREFIX_REGEX = "^[A-Za-z0-9\-]+$"
  EMAIL_ADDRESS_REGEX = "^[a-z0-9]+[@]\w+[.]\w{2,3}$"
  PHONE_NUMBER_REGEX = "^\+\d{1,15}$"


In [3]:
role = get_execution_role()
sess = Session()

In [4]:
estimator = Estimator(
    image_uri='155954279556.dkr.ecr.us-east-1.amazonaws.com/gs-automl-base-containers/tabular312_sm:1.0',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    hyperparameters={
        "table_name": "automl-classification-experiment",
        "project_hashkey": "2ee07a49",
        "experiment_hashkey": "1cbd8309",
        "dataset_table_name": "automl-dataset",
        "dataset_profile_table_name": "automl-dataset-profile-experiment-result",
        "model_repo_table_name": "automl-model-repo",
        "model_experiment_result_table_name": "automl-classification-experiment",
        "username": "sean@gs.co.kr",
        "job_type": "training",
        "task_token": "1234",
    },
    base_job_name='custom-training',
    sagemaker_session=sess,
    # 태그 설정 (SCP 요구사항 충족)
    tags=[
        {'Key': 'Environment', 'Value': 'dev'},
        {'Key': 'Project', 'Value': 'automl'},
        {'Key': 'Owner', 'Value': 'sean'},
        {'Key': 'CostCenter', 'Value': 'gs-retail'}
    ],
    # 기존 버킷 사용 (버킷 생성 방지)
    output_path=f's3://retail-mlops-edu-202602/output',
)

try:
    estimator.fit()
except ValueError as e:
    print(e)

INFO:sagemaker:Creating training-job with name: custom-training-2026-02-20-02-51-20-755


2026-02-20 02:51:22 Starting - Starting the training job...
2026-02-20 02:51:37 Starting - Preparing the instances for training...
2026-02-20 02:52:23 Downloading - Downloading the training image........[34m2026-02-20 02:53:32,306 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2026-02-20 02:53:32,307 sagemaker-training-toolkit INFO     Failed to parse hyperparameter dataset_profile_table_name value automl-dataset-profile-experiment-result to Json.[0m
[34mReturning the value itself[0m
[34m2026-02-20 02:53:32,307 sagemaker-training-toolkit INFO     Failed to parse hyperparameter dataset_table_name value automl-dataset to Json.[0m
[34mReturning the value itself[0m
[34m2026-02-20 02:53:32,307 sagemaker-training-toolkit INFO     Failed to parse hyperparameter experiment_hashkey value 1cbd8309 to Json.[0m
[34mReturning the value itself[0m
[34m2026-02-20 02:53:32,307 sagemaker-training-toolkit INFO     Failed to parse hyperparameter jo

In [5]:
# 이후, 최신 트레이닝 잡 정보 접근
training_job_name = estimator.latest_training_job.name
print("Training Job Name:", training_job_name)

# CloudWatch 로그 그룹과 스트림 이름 가져오기
log_group = f"/aws/sagemaker/TrainingJobs"
log_stream = f"{training_job_name}/algo-1"

print("Log Group:", log_group)
print("Log Stream:", log_stream)

Training Job Name: custom-training-2026-02-10-15-45-43-653
Log Group: /aws/sagemaker/TrainingJobs
Log Stream: custom-training-2026-02-10-15-45-43-653/algo-1


In [6]:
import boto3

logs_client = boto3.client("logs", region_name="us-east-1")

In [7]:
streams = logs_client.describe_log_streams(
    logGroupName="/aws/sagemaker/TrainingJobs",
    logStreamNamePrefix=f"{training_job_name}/algo-1"
)

for stream in streams['logStreams']:
    print(stream['logStreamName'])  # 전체 UUID 포함된 스트림 이름


custom-training-2026-02-10-15-45-43-653/algo-1-1770738388
