# Module 2. GluonTS Training on Amazon SageMaker
---

본 모듈에서는 Amazon SageMaker API를 호출하여 모델 훈련을 수행합니다. 노트북 실행에는 약 10분 가량 소요되며, 핸즈온 실습 시에는 25분을 권장드립니다.

Amazon SageMaker는 완전관리형 머신 러닝 서비스로 인프라 관리에 대해 걱정할 필요가 없으며, 딥러닝 프레임워크의 훈련/배포 컨테이너 이미지를 가져 와서
여러분의 스크립트 코드를 쉽게 통합할 수 있습니다.

<br>

## 1. Training script
---

아래 코드 셀은 `src` 디렉토리에 SageMaker 훈련 스크립트인 `train.py`를 저장합니다.
아래 스크립트가 이전 모듈의 코드와 대부분 일치하다는 점을 알 수 있습니다. 다시 말해, SageMaker 훈련 스크립트 파일은 기존 온프레미스에서 사용했던 Python 스크립트 파일과 크게 다르지 않으며, SageMaker 훈련 컨테이너에서 수행하기 위한 추가적인 환경 변수들만 설정하시면 됩니다.

환경 변수 설정의 code snippet은 아래과 같습니다.

```python
# SageMaker Container environment
parser.add_argument('--model_dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--data_dir', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
parser.add_argument('--num_gpus', type=int, default=os.environ['SM_NUM_GPUS'])
parser.add_argument('--output_dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
```

In [1]:
%%writefile ./src/train.py

import os
import pandas as pd
import gluonts 
import numpy as np
import argparse
import json
import pathlib
from mxnet import gpu, cpu
from mxnet.context import num_gpus
import matplotlib.pyplot as plt

from gluonts.dataset.util import to_pandas
from gluonts.mx.distribution import DistributionOutput, StudentTOutput, NegativeBinomialOutput, GaussianOutput
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.model.deepar import DeepAREstimator
from gluonts.mx.trainer import Trainer
from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import make_evaluation_predictions, backtest_metrics
from gluonts.model.predictor import Predictor
from gluonts.dataset.field_names import FieldName
from gluonts.dataset.common import ListDataset

def train(args):
    
    # Parse arguments
    epochs = args.epochs
    pred_length = args.pred_length
    num_layers = args.num_layers
    num_cells = args.num_cells
    dropout_rate = args.dropout_rate
    batch_size = args.batch_size
    lr = args.lr
    model_dir = args.model_dir
    data_dir = args.data_dir
    num_gpus = args.num_gpus
    output_dir = args.output_dir
    device = "gpu" if num_gpus > 0 else "cpu"
    FREQ = 'D'    
    
    # Get training data
    target_df = pd.read_csv(os.path.join(data_dir, 'target_train.csv'))
    target_df.set_index(target_df.columns[0], inplace=True)    
    target = target_df.values
    num_steps, num_series = target_df.shape
    start_dt = target_df.index[0]
    
    custom_ds_metadata = {'num_series': num_series,
                          'num_steps': num_steps,
                          'prediction_length': pred_length,
                          'freq': FREQ,
                          'start': [start_dt for _ in range(num_series)] 
                         }    

    # Prepare GlounTS Dataset
    train_lst = []
    for i in range(0, num_series):
        target_vec = target[:-pred_length, i]
        dic = {FieldName.TARGET: target_vec, 
               FieldName.START: start_dt} 

        train_lst.append(dic)

    test_lst = []
    for i in range(0, num_series):
        target_vec = target[:, i]
        dic = {FieldName.TARGET: target_vec, 
               FieldName.START: start_dt} 
        test_lst.append(dic)
        
    train_ds = ListDataset(train_lst, freq=FREQ)
    test_ds = ListDataset(test_lst, freq=FREQ)
    train_entry = next(iter(train_ds))
    train_entry.keys()
    
    # Define Estimator    
    trainer = Trainer(
        ctx=device,
        epochs=epochs,
        learning_rate=lr,
        batch_size=batch_size
    )        
    
    deepar_estimator = DeepAREstimator(freq=FREQ, 
                                   prediction_length=pred_length,
                                   num_cells=num_cells,
                                   dropout_rate=dropout_rate,
                                   num_layers=num_layers,
                                   distr_output=StudentTOutput(),
                                   trainer=trainer)

    # Train the model
    deepar_predictor = deepar_estimator.train(train_ds)
    
    # Evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_ds, deepar_predictor, num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds))

    metrics = ['RMSE', 'MAPE', 'wQuantileLoss[0.1]', 'wQuantileLoss[0.5]', 'wQuantileLoss[0.9]', 'mean_wQuantileLoss']
    metrics_dic = dict((key,value) for key, value in agg_metrics.items() if key in metrics)
    print(json.dumps(metrics_dic, indent=2))

    # Save the model
    deepar_predictor.serialize(pathlib.Path(model_dir))
    return deepar_predictor


def parse_args():
    parser = argparse.ArgumentParser()
    
    # Hyperparameter Setting
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--pred_length', type=int, default=21)    
    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--num_cells', type=int, default=30)
    parser.add_argument('--dropout_rate', type=float, default=0.1)
    parser.add_argument('--batch_size', type=float, default=32)
    parser.add_argument('--lr', type=float, default=0.001) 
    
    # SageMaker Container Environment
    parser.add_argument('--model_dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--data_dir', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
    parser.add_argument('--num_gpus', type=int, default=os.environ['SM_NUM_GPUS'])
    parser.add_argument('--output_dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
    
    args = parser.parse_args()
    return args    

if __name__ == '__main__':
    args = parse_args()
    train(args)    

Overwriting ./src/train.py


<br>

## 2. Training
---

스크립트가 준비되었다면 SageMaker 훈련을 수행하는 법은 매우 간단합니다. SageMaker Python SDK 활용 시, Estimator 인스턴스를 생성하고 해당 인스턴스의 `fit()` 메서드를 호출하는 것이 전부입니다. 좀 더 자세히 기술해 보면 아래와 같습니다.

#### 1) Estimator 인스턴스 생성
훈련 컨테이너에 필요한 설정들을 지정합니다. 본 핸즈온에서는 훈련 스크립트 파일이 포함된 경로인 소스 경로와(source_dir)와 훈련 스크립트 Python 파일만 엔트리포인트(entry_point)로 지정해 주면 됩니다.

#### 2) `fit()` 메서드 호출
`estimator.fit(YOUR_TRAINING_DATA_URI)` 메서드를 호출하면, 훈련에 필요한 인스턴스를 시작하고 컨테이너 환경을 시작합니다. 필수 인자값은 훈련 데이터가 존해자는 S3 경로(`s3://`)이며, 로컬 모드로 훈련 시에는 S3 경로와 로컬 경로(`file://`)를 모두 지정할 수 있습니다.

인자값 중 wait은 디폴트 값으로 `wait=True`이며, 모든 훈련 작업이 완료될 때까지 코드 셀이 freezing됩니다. 만약 다른 코드 셀을 실행하거나, 다른 훈련 job을 시작하고 싶다면 `wait=False`로 설정하여 Asynchronous 모드로 변경하면 됩니다.

**SageMaker 훈련이 끝나면 컨테이너 환경과 훈련 인스턴스는 자동으로 삭제됩니다.** 이 때, SageMaker는 자동으로 `SM_MODEL_DIR` 경로에 저장된 최종 모델 아티팩트를 `model.tar.gz`로 압축하여 훈련 컨테이너 환경에서 S3 bucket으로 저장합니다. 당연히, S3 bucket에 저장된 모델 아티팩트를 다운로드받아 로컬 상에서 곧바로 테스트할 수 있습니다.

In [2]:
import os
import boto3
import sagemaker
from sagemaker.mxnet import MXNet

boto_session = boto3.Session()
sagemaker_session = sagemaker.Session(boto_session=boto_session)
role = sagemaker.get_execution_role()
bucket = sagemaker.Session().default_bucket()

### Upload data to Amazon S3

Amazon SageMaker로 모델 훈련을 실행하기 위해, 데이터를 S3에 업로드합니다. 참고로, 로컬 모드에서 테스트 시에는 S3에 업로드할 필요 없이 로컬 상에서도 훈련이 가능합니다.

In [3]:
prefix = 'timeseries-hol/store-item-demand/train'
s3_bucket = boto3.Session().resource('s3').Bucket(bucket)

s3_bucket.Object(os.path.join(prefix, 'target_train.csv')).upload_file('data/target_train.csv')

In [4]:
estimator = MXNet(entry_point='train.py',
                    source_dir='src',
                    role=role,
                    instance_type='local',
                    instance_count=1,
                    framework_version='1.6.0',
                    py_version='py3',
                    hyperparameters = {'epochs': 2, 
                                       'lr': 0.001,
                                      }                       
                   )

로컬 파일 시스템에서 직접 훈련을 수행해 보겠습니다.

In [5]:
local_file = f'file://{os.getcwd()}/data/target_train.csv'
estimator.fit(local_file)

Creating sjb3nctd2j-algo-1-31eob ... 
Creating sjb3nctd2j-algo-1-31eob ... done
Attaching to sjb3nctd2j-algo-1-31eob
[36msjb3nctd2j-algo-1-31eob |[0m 2021-04-07 05:16:06,613 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training
[36msjb3nctd2j-algo-1-31eob |[0m 2021-04-07 05:16:06,615 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36msjb3nctd2j-algo-1-31eob |[0m 2021-04-07 05:16:06,629 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-31eob"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"epochs":2,"lr":0.001}', 'SM_USER_ENTRY_POINT': 'train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1-31eob","hosts":["algo-1-31eob"]}', 'SM_INPUT_DATA_CONFIG': '{"training":{"TrainingInputMode":"File"}}', 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data', 'SM_CHANNELS': '["training"]', 'SM_CURRENT_HOST': 'algo-1-31eob', 'SM_MODULE_NAME': 'train', '

[36msjb3nctd2j-algo-1-31eob |[0m [K     |                                | 10kB 25.1MB/s eta 0:00:01[K     |▏                               | 20kB 32.2MB/s eta 0:00:01[K     |▏                               | 30kB 37.9MB/s eta 0:00:01[K     |▎                               | 40kB 40.7MB/s eta 0:00:01[K     |▎                               | 51kB 33.2MB/s eta 0:00:01[K     |▍                               | 61kB 35.9MB/s eta 0:00:01[K     |▍                               | 71kB 36.4MB/s eta 0:00:01[K     |▌                               | 81kB 36.8MB/s eta 0:00:01[K     |▌                               | 92kB 38.0MB/s eta 0:00:01[K     |▋                               | 102kB 39.3MB/s eta 0:00:01[K     |▊                               | 112kB 39.3MB/s eta 0:00:01[K     |▊                               | 122kB 39.3MB/s eta 0:00:01[K     |▉                               | 133kB 39.3MB/s eta 0:00:01[K     |▉                               | 143kB 39.3MB/s eta 0

[36msjb3nctd2j-algo-1-31eob |[0m [?25hInstalling collected packages: pandas, toolz, dataclasses, pydantic, ujson, hijri-converter, korean-lunar-calendar, pymeeus, convertdate, holidays, gluonts
[36msjb3nctd2j-algo-1-31eob |[0m   Found existing installation: pandas 0.25.1
[36msjb3nctd2j-algo-1-31eob |[0m     Uninstalling pandas-0.25.1:
[36msjb3nctd2j-algo-1-31eob |[0m       Successfully uninstalled pandas-0.25.1
[36msjb3nctd2j-algo-1-31eob |[0m     Running setup.py install for ujson ... [?25ldone
[36msjb3nctd2j-algo-1-31eob |[0m [?25h    Running setup.py install for pymeeus ... [?25ldone
[36msjb3nctd2j-algo-1-31eob |[0m [?25hSuccessfully installed convertdate-2.3.2 dataclasses-0.8 gluonts-0.6.7 hijri-converter-2.1.1 holidays-0.11.1 korean-lunar-calendar-0.2.1 pandas-1.1.5 pydantic-1.6.1 pymeeus-0.5.11 toolz-0.11.1 ujson-1.35
[36msjb3nctd2j-algo-1-31eob |[0m You should consider upgrading via the 'pip install --upgrade pip' command.[0m
[36msjb3nctd2j-algo-1-31eob |

100% 50/50 [00:02<00:00, 17.44it/s, epoch=2/2, avg_epoch_loss=2.86]
[36msjb3nctd2j-algo-1-31eob |[0m INFO:gluonts.trainer:Epoch[1] Elapsed time 2.867 seconds
[36msjb3nctd2j-algo-1-31eob |[0m INFO:gluonts.trainer:Epoch[1] Evaluation metric 'epoch_loss'=2.856127
[36msjb3nctd2j-algo-1-31eob |[0m INFO:root:Computing averaged parameters.
[36msjb3nctd2j-algo-1-31eob |[0m INFO:root:Loading averaged parameters.
[36msjb3nctd2j-algo-1-31eob |[0m INFO:gluonts.trainer:End model training
Running evaluation: 100% 50/50 [00:00<00:00, 1324.56it/s]
[36msjb3nctd2j-algo-1-31eob |[0m {
[36msjb3nctd2j-algo-1-31eob |[0m   "MAPE": 0.16101935908990683,
[36msjb3nctd2j-algo-1-31eob |[0m   "RMSE": 7.968169248759345,
[36msjb3nctd2j-algo-1-31eob |[0m   "wQuantileLoss[0.1]": 0.12092247092441245,
[36msjb3nctd2j-algo-1-31eob |[0m   "wQuantileLoss[0.5]": 0.15674244992428232,
[36msjb3nctd2j-algo-1-31eob |[0m   "wQuantileLoss[0.9]": 0.05020846529797195,
[36msjb3nctd2j-algo-1-31eob |[0m   "mean_wQ

물론 S3 경로에서도 로컬 모드 훈련이 가능합니다. 

In [6]:
s3_input = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}'.format(bucket, prefix))
estimator.fit(s3_input)

Creating uwlnf21it3-algo-1-tpi2f ... 
Creating uwlnf21it3-algo-1-tpi2f ... done
Attaching to uwlnf21it3-algo-1-tpi2f
[36muwlnf21it3-algo-1-tpi2f |[0m 2021-04-07 05:16:32,575 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training
[36muwlnf21it3-algo-1-tpi2f |[0m 2021-04-07 05:16:32,578 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36muwlnf21it3-algo-1-tpi2f |[0m 2021-04-07 05:16:32,593 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-tpi2f"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"epochs":2,"lr":0.001}', 'SM_USER_ENTRY_POINT': 'train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1-tpi2f","hosts":["algo-1-tpi2f"]}', 'SM_INPUT_DATA_CONFIG': '{"training":{"TrainingInputMode":"File"}}', 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data', 'SM_CHANNELS': '["training"]', 'SM_CURRENT_HOST': 'algo-1-tpi2f', 'SM_MODULE_NAME': 'train', '

[36muwlnf21it3-algo-1-tpi2f |[0m [K     |                                | 10kB 24.7MB/s eta 0:00:01[K     |▏                               | 20kB 30.1MB/s eta 0:00:01[K     |▏                               | 30kB 34.4MB/s eta 0:00:01[K     |▎                               | 40kB 32.7MB/s eta 0:00:01[K     |▎                               | 51kB 30.0MB/s eta 0:00:01[K     |▍                               | 61kB 32.0MB/s eta 0:00:01[K     |▍                               | 71kB 33.3MB/s eta 0:00:01[K     |▌                               | 81kB 31.5MB/s eta 0:00:01[K     |▌                               | 92kB 32.4MB/s eta 0:00:01[K     |▋                               | 102kB 32.1MB/s eta 0:00:01[K     |▊                               | 112kB 32.1MB/s eta 0:00:01[K     |▊                               | 122kB 32.1MB/s eta 0:00:01[K     |▉                               | 133kB 32.1MB/s eta 0:00:01[K     |▉                               | 143kB 32.1MB/s eta 0

[36muwlnf21it3-algo-1-tpi2f |[0m [?25hInstalling collected packages: pandas, dataclasses, pydantic, ujson, pymeeus, convertdate, korean-lunar-calendar, hijri-converter, holidays, toolz, gluonts
[36muwlnf21it3-algo-1-tpi2f |[0m   Found existing installation: pandas 0.25.1
[36muwlnf21it3-algo-1-tpi2f |[0m     Uninstalling pandas-0.25.1:
[36muwlnf21it3-algo-1-tpi2f |[0m       Successfully uninstalled pandas-0.25.1
[36muwlnf21it3-algo-1-tpi2f |[0m     Running setup.py install for ujson ... [?25ldone
[36muwlnf21it3-algo-1-tpi2f |[0m [?25h    Running setup.py install for pymeeus ... [?25ldone
[36muwlnf21it3-algo-1-tpi2f |[0m [?25hSuccessfully installed convertdate-2.3.2 dataclasses-0.8 gluonts-0.6.7 hijri-converter-2.1.1 holidays-0.11.1 korean-lunar-calendar-0.2.1 pandas-1.1.5 pydantic-1.6.1 pymeeus-0.5.11 toolz-0.11.1 ujson-1.35
[36muwlnf21it3-algo-1-tpi2f |[0m You should consider upgrading via the 'pip install --upgrade pip' command.[0m
[36muwlnf21it3-algo-1-tpi2f |

100% 50/50 [00:02<00:00, 17.44it/s, epoch=2/2, avg_epoch_loss=2.91]
[36muwlnf21it3-algo-1-tpi2f |[0m INFO:gluonts.trainer:Epoch[1] Elapsed time 2.867 seconds
[36muwlnf21it3-algo-1-tpi2f |[0m INFO:gluonts.trainer:Epoch[1] Evaluation metric 'epoch_loss'=2.911405
[36muwlnf21it3-algo-1-tpi2f |[0m INFO:root:Computing averaged parameters.
[36muwlnf21it3-algo-1-tpi2f |[0m INFO:root:Loading averaged parameters.
[36muwlnf21it3-algo-1-tpi2f |[0m INFO:gluonts.trainer:End model training
Running evaluation: 100% 50/50 [00:00<00:00, 1367.96it/s]
[36muwlnf21it3-algo-1-tpi2f |[0m {
[36muwlnf21it3-algo-1-tpi2f |[0m   "MAPE": 0.13160886222192078,
[36muwlnf21it3-algo-1-tpi2f |[0m   "RMSE": 6.71450816596648,
[36muwlnf21it3-algo-1-tpi2f |[0m   "wQuantileLoss[0.1]": 0.07630109874339276,
[36muwlnf21it3-algo-1-tpi2f |[0m   "wQuantileLoss[0.5]": 0.12810226028231544,
[36muwlnf21it3-algo-1-tpi2f |[0m   "wQuantileLoss[0.9]": 0.04572316316129437,
[36muwlnf21it3-algo-1-tpi2f |[0m   "mean_wQu

### SageMaker Hosted Training

훈련 코드가 로컬에서 잘 작동하므로, 이제 SageMaker에서 관리하는 훈련 인스턴스를 사용하여 훈련을 수행하겠습니다. 로컬 모드 훈련과 달리 호스팅 훈련은
노트북 인스턴스 대신에 SageMaker에서 관리하는 별도의 클러스터에서 수행합니다. 본 핸즈온의 데이터셋 사이즈가 작기 때문에 체감이 되지 않겠지만, 대규모 데이터 및 복잡한 모델에 대한 분산 훈련은 SageMaker 호스팅 훈련 방법을 사용하는 것을 권장합니다.

In [7]:
estimator = MXNet(entry_point='train.py',
                  source_dir='src',
                  role=role,
                  instance_type='ml.c5.xlarge',
                  instance_count=1,
                  framework_version='1.6.0',
                  py_version='py3',
                  hyperparameters = {'epochs': 15, 
                                     'lr': 0.001,
                                    }                       
                 )

In [8]:
estimator.fit(s3_input)

2021-04-07 05:16:57 Starting - Starting the training job...
2021-04-07 05:17:20 Starting - Launching requested ML instancesProfilerReport-1617772617: InProgress
......
2021-04-07 05:18:20 Starting - Preparing the instances for training......
2021-04-07 05:19:20 Downloading - Downloading input data
2021-04-07 05:19:20 Training - Downloading the training image...
2021-04-07 05:19:44 Training - Training image download completed. Training in progress.[34m2021-04-07 05:19:44,322 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training[0m
[34m2021-04-07 05:19:44,325 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-04-07 05:19:44,336 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"epochs":15,"lr":0.001}', 'SM_USER_ENTRY_POINT': 'train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"alg

[34m#015  0%|          | 0/50 [00:00<?, ?it/s]#015100%|██████████| 50/50 [00:03<00:00, 15.65it/s, epoch=3/15, avg_epoch_loss=2.67][0m
[34mINFO:gluonts.trainer:Epoch[2] Elapsed time 3.195 seconds[0m
[34mINFO:gluonts.trainer:Epoch[2] Evaluation metric 'epoch_loss'=2.673008[0m
[34mINFO:gluonts.trainer:Epoch[3] Learning rate is 0.001[0m
[34m#015  0%|          | 0/50 [00:00<?, ?it/s]#015100%|██████████| 50/50 [00:02<00:00, 19.79it/s, epoch=4/15, avg_epoch_loss=2.58][0m
[34mINFO:gluonts.trainer:Epoch[3] Elapsed time 2.527 seconds[0m
[34mINFO:gluonts.trainer:Epoch[3] Evaluation metric 'epoch_loss'=2.578444[0m
[34mINFO:gluonts.trainer:Epoch[4] Learning rate is 0.001[0m
[34m#015  0%|          | 0/50 [00:00<?, ?it/s]#015100%|██████████| 50/50 [00:02<00:00, 18.82it/s, epoch=5/15, avg_epoch_loss=2.54][0m
[34mINFO:gluonts.trainer:Epoch[4] Elapsed time 2.658 seconds[0m
[34mINFO:gluonts.trainer:Epoch[4] Evaluation metric 'epoch_loss'=2.544582[0m
[34mINFO:gluonts.trainer:Epoch[5

<br>

## 3. Getting Model Artifacts
---

훈련이 완료된 모델 아티팩트를 로컬(노트북 인스턴스 or 온프레미스)로 복사합니다. 훈련 완료 시 `SM_MODEL_DIR`에 있는 파일들이
`model.tar.gz`로 자동으로 압축되며, 압축을 해제하여 로컬 상에서도 추론을 수행할 수 있습니다.

In [9]:
local_model_dir = './model'
!rm -rf $local_model_dir

In [10]:
import json, os

s3_model_dir = estimator.model_data.replace('model.tar.gz', '')
print(s3_model_dir)
!aws s3 ls {s3_model_dir}

if not os.path.exists(local_model_dir):
    os.makedirs(local_model_dir)

!aws s3 cp {s3_model_dir}model.tar.gz {local_model_dir}/model.tar.gz
!tar -xzf {local_model_dir}/model.tar.gz -C {local_model_dir}

s3://sagemaker-us-east-1-143656149352/mxnet-training-2021-04-07-05-16-57-142/output/
2021-04-07 05:20:44      60053 model.tar.gz
download: s3://sagemaker-us-east-1-143656149352/mxnet-training-2021-04-07-05-16-57-142/output/model.tar.gz to model/model.tar.gz


다음 모듈에서 활용할 변수들을 저장합니다.

In [11]:
%store s3_model_dir
%store prefix

Stored 's3_model_dir' (str)
Stored 'prefix' (str)
