In [1]:
%pip freeze | grep sagemaker

sagemaker==2.235.2
sagemaker-core==1.0.77
sagemaker-experiments==0.1.45
sagemaker_training==4.9.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
role = get_execution_role()
sess = sagemaker.Session()

## Model Train job 실행

In [4]:
# ============================================
# Estimator 생성
# ============================================
estimator = Estimator(
    # 앞서 빌드한 SageMaker 이미지 사용
    image_uri='155954279556.dkr.ecr.us-east-1.amazonaws.com/gs-automl-base-containers/lightgbm311_sm:1.0',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',  # 또는 ml.m5.xlarge

    # 하이퍼파라미터 (선택적)
    hyperparameters={
        "table_name": "automl-lightgbm-experiment",
        "project_hashkey": "2ee07a49",
        "experiment_hashkey": "1cbd8309",
        "dataset_table_name": "automl-dataset",
        "username": "hjsong@gsretail.com", 
        "job_type": "training",
        "task_token": "1234",
    },

    base_job_name='train-titanic-lightgbm',
    sagemaker_session=sess,

    # 태그
    tags=[
        {'Key': 'Environment', 'Value': 'dev'},
        {'Key': 'Project', 'Value': 'automl'},
        {'Key': 'Owner', 'Value': 'hjsong'},
        {'Key': 'CostCenter', 'Value': 'gs-retail'}
    ],

    # 출력 경로
    output_path=f's3://retail-mlops-edu-2026/edu-2w/hjsong/output',
)


In [5]:
# ============================================
# 학습 데이터 준비
# ============================================
# 학습 데이터를 S3에 업로드
train_s3_path = sess.upload_data(
    path='train.csv',
    bucket='retail-mlops-edu-2026',
    key_prefix='edu-2w/hjsong/input'
)

print(f"Training data uploaded to: {train_s3_path}")

Training data uploaded to: s3://retail-mlops-edu-2026/edu-2w/hjsong/input/train.csv


In [6]:
# ============================================
# Training Job 실행
# ============================================
try:
    estimator.fit({
        'training': train_s3_path
    })

    print("=" * 60)
    print("✅ Training Job 완료!")
    print(f"   Job Name: {estimator.latest_training_job.name}")
    print(f"   Model Artifact: {estimator.model_data}")
    print("=" * 60)

except Exception as e:
    print(f"❌ Training Job 실패: {e}")

INFO:sagemaker:Creating training-job with name: train-titanic-lightgbm-2026-02-26-14-28-23-007


2026-02-26 14:28:24 Starting - Starting the training job...
2026-02-26 14:28:39 Starting - Preparing the instances for training...
2026-02-26 14:29:03 Downloading - Downloading input data...
2026-02-26 14:29:33 Downloading - Downloading the training image.....[34m2026-02-26 14:30:32,494 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2026-02-26 14:30:32,495 sagemaker-training-toolkit INFO     Failed to parse hyperparameter dataset_table_name value automl-dataset to Json.[0m
[34mReturning the value itself[0m
[34m2026-02-26 14:30:32,495 sagemaker-training-toolkit INFO     Failed to parse hyperparameter experiment_hashkey value 1cbd8309 to Json.[0m
[34mReturning the value itself[0m
[34m2026-02-26 14:30:32,495 sagemaker-training-toolkit INFO     Failed to parse hyperparameter job_type value training to Json.[0m
[34mReturning the value itself[0m
[34m2026-02-26 14:30:32,495 sagemaker-training-toolkit INFO     Failed to parse hyperparam

## 결과 확인

In [8]:
# ============================================================
# Step 3: 결과 노트북 다운로드 및 확인
# ============================================================

import boto3
import os
import pandas as pd

s3 = boto3.client('s3')

# ============================================
# 실행된 노트북 다운로드
# ============================================
bucket = 'retail-mlops-edu-2026'
key = 'edu-2w/hjsong/output/train_titanic_lightgbm_output.ipynb'
local_path = 'titanic_output.ipynb'

s3.download_file(bucket, key, local_path)

print(f"✅ 결과 노트북 다운로드 완료: {local_path}")
print(f"   Jupyter에서 열어서 확인하세요!")

# ============================================
# 모델 다운로드
# ============================================
print(estimator.latest_training_job.name)
model_key = f"edu-2w/hjsong/output/{estimator.latest_training_job.name}/output/model.tar.gz"
model_local = 'model.tar.gz'
s3.download_file(bucket, model_key, model_local)

# 압축 해제
import tarfile
with tarfile.open(model_local, 'r:gz') as tar:
    tar.extractall('model')

print(f"✅ 모델 다운로드 완료: ./model/")

# ============================================
# 모델 로드 및 테스트
# ============================================
import joblib

model = joblib.load('model/titanic_model.joblib')

# 테스트 데이터로 예측
val_key = "edu-2w/hjsong/data/val/validation.csv"
val_local = "test.csv"
s3.download_file(bucket, val_key, val_local)
test_df = pd.read_csv('test.csv')
# ... 전처리
predictions = model.predict(test_df)

print(f"✅ 예측 완료: {len(predictions)} samples")


✅ 결과 노트북 다운로드 완료: titanic_output.ipynb
   Jupyter에서 열어서 확인하세요!
train-titanic-lightgbm-2026-02-26-14-28-23-007
✅ 모델 다운로드 완료: ./model/


[LightGBM] [Fatal] The number of features in data (12) is not the same as it was in training data (11).
You can set ``predict_disable_shape_check=true`` to discard this error, but please be aware what you are doing.
