# 1.2 Amazon SageMaker Training Warm Pool

## 학습 작업의 실행 노트북 개요

- SageMaker Warm Pool 기능을 사용하여 학습 합니다.
    - [Train Using SageMaker Managed Warm Pools](https://docs.aws.amazon.com/sagemaker/latest/dg/train-warm-pools.html)

### 작업 실행 시 필요 라이브러리 import

In [1]:
import boto3
import sagemaker

### Bucket 정의

In [2]:
use_default_bucket = True

if use_default_bucket:
    bucket = sagemaker.session.Session().default_bucket()
else:
    bucket = '<Type your bucket name here>'
    
print("bucket: ", bucket)        

bucket:  sagemaker-us-east-1-057716757052


###  훈련 잡에 사용할 SageMaker Role 정의 
- 로컬 머신에서 사용할 경우에는 아래의 주석을 해제하시고, SageMaker Execution Role ARN 을 넣어 주세요.

In [3]:
use_local_machine = False
# use_local_machine = True

if use_local_machine:
    # role = '<Type Role ARN here>'
    # role = 'arn:aws:iam::057716757052:role/dt2gsmoon'        
    pass
else:
    role = sagemaker.get_execution_role()    


### 하이퍼파라미터 정의

In [4]:
hyperparameters = {
       "scale_pos_weight" : "29",    
        "max_depth": "3",
        "eta": "0.2",
        "objective": "binary:logistic",
        "num_round": "100",
}

### 학습 실행 작업 정의

In [5]:
sagemaker_session = sagemaker.session.Session()
instance_type = "ml.m5.large"
output_path = f's3://{bucket}/xgboost/output'    



In [18]:
from sagemaker.xgboost.estimator import XGBoost

estimator = XGBoost(
    entry_point="xgboost_starter_script.py",
    source_dir='src',
    output_path=output_path,
    hyperparameters=hyperparameters,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count= 1,
    instance_type=instance_type,
    framework_version="1.3-1",
    keep_alive_period_in_seconds=1800
)

### 학습 데이터셋을 FSX 파일 시스템으로 카피 

In [20]:
data_path=f's3://{bucket}/xgboost/dataset'
!aws s3 sync ../data/dataset/ $data_path    
inputs = data_path
    
print("input for fsx_path: ", inputs)                

input for fsx_path:  s3://sagemaker-us-east-1-057716757052/xgboost/dataset


### 학습 실행

In [21]:
estimator.fit(inputs = {'inputdata': inputs},
                  wait=False)

In [22]:
%%time 

estimator.logs()

2023-05-08 07:47:03 Starting - Starting the training job...ProfilerReport-1683532023: InProgress
......
2023-05-08 07:48:27 Downloading - Downloading input data...
2023-05-08 07:49:04 Training - Downloading the training image.....[34m[2023-05-08 07:49:44.527 ip-10-0-80-108.ec2.internal:6 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-05-08 07:49:44.551 ip-10-0-80-108.ec2.internal:6 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-05-08:07:49:44:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-05-08:07:49:44:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-05-08:07:49:44:INFO] Invoking user training script.[0m
[34m[2023-05-08:07:49:44:INFO] Installing module with the following command:[0m
[34m/miniconda3/bin/python3 -m pip install . [0m
[34mProcessing /opt/ml/code
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'[0m
[34mBuilding 

### Second Training Job using Warm Pool

In [12]:
hyperparameters = {
       "scale_pos_weight" : "29",    
#        "max_depth": "3",
        "max_depth": "5",    
        "eta": "0.2",
        "objective": "binary:logistic",
        "num_round": "100",    
}

In [15]:
from sagemaker.xgboost.estimator import XGBoost

estimator = XGBoost(
    entry_point="xgboost_starter_script.py",
    source_dir='src',
    output_path=output_path,
    hyperparameters=hyperparameters,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count=1,
    instance_type=instance_type,
    framework_version="1.3-1",
    keep_alive_period_in_seconds=1800,    

)

In [16]:
estimator.fit(inputs = {'inputdata': inputs},
                  wait=False)

In [17]:
%%time 

estimator.logs()

2023-05-08 07:28:15 Starting - Starting the training job...
2023-05-08 07:28:37 Starting - Preparing the instances for trainingProfilerReport-1683530893: InProgress
......
2023-05-08 07:29:37 Downloading - Downloading input data...
2023-05-08 07:29:57 Training - Downloading the training image...
2023-05-08 07:30:38 Training - Training image download completed. Training in progress..[34m[2023-05-08 07:30:42.110 ip-10-2-196-164.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-05-08 07:30:42.134 ip-10-2-196-164.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-05-08:07:30:42:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-05-08:07:30:42:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-05-08:07:30:42:INFO] Invoking user training script.[0m
[34m[2023-05-08:07:30:42:INFO] Installing module with the following command:[0m
[34m/miniconda3/bin/python3 -m pip install