# 2.0 Amazon SageMaker Training in VPC with S3 

- SageMaker Python SDK - VPC
    - https://sagemaker.readthedocs.io/en/v2.49.2/overview.html?highlight=vpc%20deploy#secure-training-and-inference-with-vpc

## import library

In [1]:
import boto3
import sagemaker

### Bucket 정의

In [2]:
use_default_bucket = True

if use_default_bucket:
    bucket = sagemaker.session.Session().default_bucket()
else:
    bucket = '<Type your bucket name here>'
    
print("bucket: ", bucket)        

bucket:  sagemaker-ap-northeast-2-503161638684


###  훈련 잡에 사용할 SageMaker Role 정의 
- 로컬 머신에서 사용할 경우에는 아래의 주석을 해제하시고, SageMaker Execution Role ARN 을 넣어 주세요.

In [3]:
use_local_machine = False
# use_local_machine = True

if use_local_machine:
    # role = '<Type Role ARN here>'
    pass
else:
    role = sagemaker.get_execution_role()    


In [5]:
import boto3

boto_session = boto3.Session(region_name='ap-northeast-2')
sagemaker_session = sagemaker.session.Session(boto_session)


### Define hyperparameter

In [6]:
hyperparameters = {
       "scale_pos_weight" : "29",    
        "max_depth": "3",
        "eta": "0.2",
        "objective": "binary:logistic",
        "num_round": "100",
}

### Define instance type 

In [7]:
instance_count = 1
instance_type = "ml.m5.large"


### 학습 데이터셋 정의

In [8]:
data_path=f's3://{bucket}/xgboost/dataset'
!aws s3 sync ../data/dataset/ $data_path    


inputs = {'train': data_path}
print("inputs: \n", inputs)


inputs: 
 {'train': 's3://sagemaker-ap-northeast-2-503161638684/xgboost/dataset'}


## Setup VPC Endpoint

In [32]:
# subnet_id = '<Type your subnet_id>'
# security_group_id = '<Type your security_group_id'

subnet_id = ['subnet-06321ef1e605f09ca','subnet-02344e34c84f2ea59']
# subnets = list()
# subnets.append(subnet_id)
subnets = subnet_id 

# subnet_id = 'subnet-02344e34c84f2ea59'

# security_group_id = 'sg-0a651a54a6294d84c'
security_group_id = 'sg-0a651a54a6294d84c'

security_group_ids = list()
security_group_ids.append(security_group_id)
print("security_group_ids: ", security_group_ids)
print("subnets: ", subnets)


security_group_ids:  ['sg-0a651a54a6294d84c']
subnets:  ['subnet-06321ef1e605f09ca', 'subnet-02344e34c84f2ea59']


In [33]:
from sagemaker.xgboost.estimator import XGBoost

estimator = XGBoost(
    entry_point="xgboost_fsx_luster_script.py",
    source_dir='src',
    hyperparameters=hyperparameters,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count=instance_count,
    instance_type=instance_type,
    framework_version="1.3-1",
    subnets=subnets,
    security_group_ids=security_group_ids,    
)

INFO:sagemaker.image_uris:Ignoring unnecessary Python version: py3.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: ml.m5.large.


In [34]:
estimator.fit(inputs = inputs,
                  wait=False)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-06-07-23-19-22-713


In [35]:
estimator.logs()

2023-06-07 23:19:24 Starting - Starting the training job...
2023-06-07 23:19:39 Starting - Preparing the instances for training......
2023-06-07 23:20:35 Downloading - Downloading input data...
2023-06-07 23:21:15 Training - Downloading the training image...
2023-06-07 23:21:51 Training - Training image download completed. Training in progress.[34m[2023-06-07 23:21:56.206 ip-172-30-0-62.ap-northeast-2.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-06-07 23:21:56.232 ip-172-30-0-62.ap-northeast-2.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-06-07:23:21:56:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-06-07:23:21:56:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-06-07:23:21:56:INFO] Invoking user training script.[0m
[34m[2023-06-07:23:21:56:INFO] Installing module with the following command:[0m
[34m/miniconda3/bin/python3 -m pip install . 

## 모델 배포
- 위에서 Estimator 에 설정한 

In [36]:
vpc_predictor = estimator.deploy(initial_instance_count=1,
                                                 instance_type=instance_type)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-06-07-23-23-48-649
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2023-06-07-23-23-48-649
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2023-06-07-23-23-48-649


*

UnexpectedStatusException: Error hosting endpoint sagemaker-xgboost-2023-06-07-23-23-48-649: Failed. Reason: Unable to locate at least 2 availability zone(s) with the requested instance type ml.m5.large that overlap with SageMaker subnets.