# Run Blueoil on Amazon SageMaker
## Docker build and push (to Amazon ECR)
https://github.com/hadusam/blueoil-sagemaker

In [None]:
!bash ./docker_push_ecr.sh blueoil-sagemaker

## Preparing data (upload CIFAR-10 to Amazon S3)
### Create sagemaker session

In [None]:
import sagemaker
sess = sagemaker.Session()

In [None]:
import os
import shutil

def upload_data(sess, path, key_prefix='data', compress=False):
    if compress:
        path = shutil.make_archive(path, 'gztar', '.', path)
    s3_data = sess.upload_data(path=path, key_prefix=key_prefix)
    return s3_data

### Download CIFAR-10 dataset

In [None]:
!curl -O https://s3-ap-northeast-1.amazonaws.com/leapmind-public-storage/datasets/cifar.tgz
!tar xzf cifar.tgz

### Upload dataset

In [None]:
%%time
train_data = upload_data(sess, 'cifar', compress=True)

## Preparing config (upload to Amazon S3)

### Create config YAML 
https://docs.blueoil.org/tutorial/image_cls.html

create `cifar10_sample.yml` 

by 
```
./blueoil.sh init
```
with
```
dataset:
  format: Caltech101
  train_path: /opt/ml/input/dataset/cifar/train
  test_path: /opt/ml/input/dataset/cifar/test
```

In [None]:
%%time
config_data = upload_data(sess, 'cifar10_sample.yml', key_prefix='config', compress=False)

In [None]:
# # Retrieve data location

# train_data  = 's3://' + sagemaker.Session().default_bucket() + '/data/cifar.tar.gz'
# config_data += 's3://' + sagemaker.Session().default_bucket() + '/config/cifar10_sample.yml'
print(config_data)
print(train_data)

## Train
### Set execution role for local notebook

In [None]:
!aws iam list-roles | grep AmazonSageMaker-ExecutionRole | grep Arn | awk -F\" '{print $4}' > exec_role

In [None]:
import os
try:
    role = sagemaker.get_execution_role()
except ValueError as e:
    with open('exec_role', 'r') as file:
        role = file.read().replace('\n', '')
print(role)

### Case1: On your local GPU server

In [None]:
import sagemaker
from sagemaker.estimator import Estimator

algorithm_name = 'blueoil-sagemaker'
train_instance_type = 'local_gpu'

estimator = Estimator(
    image_name=algorithm_name+':latest', 
    role=role, 
    train_instance_count=1, 
    train_instance_type=train_instance_type, 
    hyperparameters={
        'config_file': 'config.yml', 
        'experiment_id': 'cifar10_sample'
    })

In [None]:
estimator.fit({'dataset': 'file://' + os.getcwd(), 'config': 'file://' + os.getcwd()})x

### Case2: On Amazon Sagemaker on-demand instance
#### Launch training instance

In [None]:
import boto3

algorithm_name = 'blueoil-sagemaker'

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

#### Run train and convert model

In [None]:
import sagemaker
from sagemaker.estimator import Estimator

train_instance_type = 'ml.p2.xlarge'

estimator = Estimator(
    image_name=ecr_image, 
    role=role, 
    train_instance_count=1, 
    train_instance_type=train_instance_type, 
    hyperparameters={
        'config': 'cifar10_sample.yml', 
        'experiment_id': 'cifar10_sample'
    })

In [None]:
estimator.fit({'dataset': train_data, 'config': config_data})

## Download converted model

In [None]:
!aws s3 cp $estimator.model_data ./
!tar zxf model.tar.gz