In [1]:
%pip freeze | grep sagemaker

sagemaker==2.235.2
sagemaker-core==1.0.77
sagemaker-experiments==0.1.45
sagemaker_training==4.9.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
role = get_execution_role()
sess = sagemaker.Session()

## Model Train job 실행

In [4]:
# ============================================
# Estimator 생성
# ============================================
estimator = Estimator(
    # 앞서 빌드한 SageMaker 이미지 사용
    image_uri='155954279556.dkr.ecr.us-east-1.amazonaws.com/gs-automl-base-containers/lightgbm311:1.0',

    role=role,
    instance_count=1,
    instance_type='ml.m5.large',  # 또는 ml.m5.xlarge

    # 하이퍼파라미터 (선택적)
    # hyperparameters={
        # "table_name": "automl-lightgbm-experiment",
        # "project_hashkey": "2ee07a49",
        # "experiment_hashkey": "1cbd8309",
        # "dataset_table_name": "automl-dataset",
    #     "username": "hjsong@gsretail.com", 
    #     "job_type": "training",
    #     # "task_token": "1234",
    # },

    base_job_name='train-titanic-lightgbm',
    sagemaker_session=sess,

    # 태그
    tags=[
        {'Key': 'Environment', 'Value': 'dev'},
        {'Key': 'Project', 'Value': 'automl'},
        {'Key': 'Owner', 'Value': 'hjsong'},
        {'Key': 'CostCenter', 'Value': 'gs-retail'}
    ],

    # 출력 경로
    output_path=f's3://retail-mlops-edu-2026-hjsong/edu-202602-staff/titanic/output',
)


In [5]:
# ============================================
# 학습 데이터 준비
# ============================================
# 학습 데이터를 S3에 업로드
train_s3_path = sess.upload_data(
    path='train.csv',
    bucket='retail-mlops-edu-2026-hjsong',
    key_prefix='edu-202602-staff/titanic/data'
)

print(f"Training data uploaded to: {train_s3_path}")

Training data uploaded to: s3://retail-mlops-edu-2026-hjsong/edu-202602-staff/titanic/data/train.csv


In [6]:
# ============================================
# Training Job 실행
# ============================================
try:
    estimator.fit({
        'training': train_s3_path
    })

    print("=" * 60)
    print("✅ Training Job 완료!")
    print(f"   Job Name: {estimator.latest_training_job.name}")
    print(f"   Model Artifact: {estimator.model_data}")
    print("=" * 60)

except Exception as e:
    print(f"❌ Training Job 실패: {e}")

INFO:sagemaker:Creating training-job with name: train-titanic-lightgbm-2026-02-26-08-24-44-850


2026-02-26 08:24:47 Starting - Starting the training job...
2026-02-26 08:25:03 Starting - Preparing the instances for training...
2026-02-26 08:25:27 Downloading - Downloading input data...
2026-02-26 08:25:52 Downloading - Downloading the training image......
2026-02-26 08:27:14 Training - Training image download completed. Training in progress.
2026-02-26 08:27:14 Uploading - Uploading generated training model[34m2026-02-26 08:27:07,056 sagemaker-training-toolkit ERROR    Reporting training FAILURE[0m
[34m2026-02-26 08:27:07,056 sagemaker-training-toolkit ERROR    Framework Error: [0m
[34mTraceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/sagemaker_training/trainer.py", line 99, in train
    entry_point.run(
  File "/usr/local/lib/python3.11/site-packages/sagemaker_training/entry_point.py", line 97, in run
    install(name=user_entry_point, path=environment.code_dir, capture_error=capture_error)
  File "/usr/local/lib/python3.11/site-packages/sag

## 결과 확인

In [7]:
# ============================================================
# Step 3: 결과 노트북 다운로드 및 확인
# ============================================================

import boto3
import os

s3 = boto3.client('s3')

# ============================================
# 실행된 노트북 다운로드
# ============================================
bucket = 'retail-mlops-edu-2026-hjsong'
key = 'edu-202602-staff/titanic/output/train_titanic_lightgbm_output.ipynb'
local_path = 'titanic_output.ipynb'

s3.download_file(bucket, key, local_path)

print(f"✅ 결과 노트북 다운로드 완료: {local_path}")
print(f"   Jupyter에서 열어서 확인하세요!")

# ============================================
# 모델 다운로드
# ============================================
model_key = f"{estimator.latest_training_job.name}/output/model.tar.gz"
model_local = 'model.tar.gz'

s3.download_file(bucket, model_key, model_local)

# 압축 해제
import tarfile
with tarfile.open(model_local, 'r:gz') as tar:
    tar.extractall('model')

print(f"✅ 모델 다운로드 완료: ./model/")

# ============================================
# 모델 로드 및 테스트
# ============================================
import joblib

model = joblib.load('model/model.joblib')

# 테스트 데이터로 예측
test_df = pd.read_csv('test.csv')
# ... 전처리
predictions = model.predict(X_test)

print(f"✅ 예측 완료: {len(predictions)} samples")
