# 2.0 Amazon SageMaker Training with S3

## import library

In [1]:
import boto3
import sagemaker

### SageMaker session and role

In [31]:
# sagemaker_session = sagemaker.session.Session()
import boto3

boto_session = boto3.Session(region_name='us-east-1')
sagemaker_session = sagemaker.session.Session(boto_session)
role = sagemaker.get_execution_role()
print("role: ", role)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


role:  arn:aws:iam::189546603447:role/mask-rcnn-sm-nb-ExecutionRole-RCM1ONA959J3


### Define hyperparameter

In [19]:
hyperparameters = {
       "scale_pos_weight" : "29",    
        "max_depth": "3",
        "eta": "0.2",
        "objective": "binary:logistic",
        "num_round": "100",
}

### Define instance type 

In [20]:
instance_count = 1
instance_type = "ml.m5.large"
# instance_type = "local"
# instance_type = "local_gpu"

max_run = 1*60*60

use_spot_instances = False
if use_spot_instances:
    max_wait = 1*60*60
else:
    max_wait = None

In [21]:
if instance_type in ['local', 'local_gpu']:
    from sagemaker.local import LocalSession
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
else:
    sagemaker_session = sagemaker.session.Session()

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


## S3 input locakton

In [22]:
s3_bucket = sagemaker_session.default_bucket()

In [23]:
train_path=f's3://{s3_bucket}/xgboost/dataset'
!aws s3 sync ../data/dataset/ $train_path

In [24]:
inputs = {'train': train_path}
print("inputs: \n", inputs)


inputs: 
 {'train': 's3://sagemaker-us-east-1-189546603447/xgboost/dataset'}


## S3 output location

In [25]:
# s3_bucket = 'cf-mask-rcnn-gsmoon'
prefix = "xgboost/output"
s3_output_location = f"s3://{s3_bucket}/{prefix}"
print("s3_output_location: ", s3_output_location)

s3_output_location:  s3://sagemaker-us-east-1-189546603447/xgboost/output


## Setup VPC Endpoint

In [26]:

subnet_id = 'subnet-f1067697'
security_group_id = 'sg-01d2d82fa2227f449'

security_group_ids = list()
security_group_ids.append(security_group_id)
subnets = list()
subnets.append(subnet_id)
print("security_group_ids: ", security_group_ids)
print("subnets: ", subnets)


security_group_ids:  ['sg-01d2d82fa2227f449']
subnets:  ['subnet-f1067697']


In [27]:
from sagemaker.xgboost.estimator import XGBoost

estimator = XGBoost(
    entry_point="xgboost_fsx_luster_script.py",
    source_dir='src',
#    code_location=code_location,
    hyperparameters=hyperparameters,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count=instance_count,
    instance_type=instance_type,
    framework_version="1.3-1",
    max_run=max_run,
    use_spot_instances=use_spot_instances,  # spot instance 활용
    output_path=s3_output_location,    
    max_wait=max_wait,
    keep_alive_period_in_seconds=1800,        
    subnets=subnets,
    security_group_ids=security_group_ids,    
)

INFO:sagemaker.image_uris:Ignoring unnecessary Python version: py3.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: ml.m5.large.


In [28]:
inputs

{'train': 's3://sagemaker-us-east-1-189546603447/xgboost/dataset'}

In [29]:
estimator.fit(inputs = inputs,
                  wait=False)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-04-13-13-58-29-945


In [30]:
estimator.logs()

2023-04-13 13:58:31 Starting - Starting the training job...
2023-04-13 13:58:50 Downloading - Downloading input data
2023-04-13 13:58:50 Training - Training image download completed. Training in progress.[34m[2023-04-13 13:58:52.627 ip-172-31-4-95.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-04-13 13:58:52.665 ip-172-31-4-95.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-04-13:13:58:52:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-04-13:13:58:52:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-04-13:13:58:52:INFO] Invoking user training script.[0m
[34m[2023-04-13:13:58:52:INFO] Module xgboost_fsx_luster_script does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m[2023-04-13:13:58:52:INFO] Generating setup.cfg[0m
[34m[2023-04-13:13:58:52:INFO] Generating MANIFEST.in[0m
[34m[2023-04-13:13:58:52:INFO] Installing module with the follo