# Run Blueoil on Amazon SageMaker
## Docker build and push (to Amazon ECR)
https://github.com/hadusam/blueoil-sagemaker

In [None]:
!bash ./docker_push_ecr.sh blueoil-sagemaker blueoil/blueoil:v0.22.0

## Preparing data (upload a face images subset of OpenimagesV4 to Amazon S3)
### Create sagemaker session

In [None]:
import sagemaker
sess = sagemaker.Session()

In [None]:
import os
import shutil

def upload_data(sess, path, key_prefix='data', compress=False):
    if compress:
        path = shutil.make_archive(path, 'gztar', '.', path)
    s3_data = sess.upload_data(path=path, key_prefix=key_prefix)
    return s3_data

### Download a face image subset of OpenimagesV4 dataset

In [None]:
!curl -O https://s3-ap-northeast-1.amazonaws.com/leapmind-public-storage/datasets/openimages_face.tgz
!tar xf openimages_face.tgz

### Upload dataset

In [None]:
%%time
train_data = upload_data(sess, 'openimages_face', compress=True)

## Preparing config (upload to Amazon S3)

### Create config file
https://docs.blueoil.org/tutorial/image_det.html

create `openimages_face_sample.py` 

by 
```
blueoil init -o openimages_face_sample.py
```
with
```
dataset:
  format: OpenImagesV4
  train dataset path: /opt/ml/input/data/dataset/openimages_face/
  validation dataset path: /opt/ml/input/data/dataset/openimages_face/
```

In [None]:
%%time
config_data = upload_data(sess, 'openimages_face_sample.py', key_prefix='config', compress=False)

In [None]:
# # Retrieve data location

# train_data  = 's3://' + sagemaker.Session().default_bucket() + '/data/openimages_face.tar.gz'
# config_data += 's3://' + sagemaker.Session().default_bucket() + '/config/openimages_face_sample.yml'
print(config_data)
print(train_data)

## Train

### On Amazon Sagemaker on-demand instance
#### Launch training instance

In [None]:
import boto3

algorithm_name = 'blueoil-sagemaker'

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

#### Run train and convert model

In [None]:
import sagemaker
from sagemaker.estimator import Estimator

train_instance_type = 'ml.p2.xlarge'

estimator = Estimator(
    image_name=ecr_image, 
    role=sagemaker.get_execution_role(), 
    train_instance_count=1, 
    train_instance_type=train_instance_type, 
    hyperparameters={
        'config': '/opt/ml/input/data/config/openimages_face_sample.py', 
        'experiment_id': 'objectdetection_face_sample'
    })

In [None]:
estimator.fit({'dataset': train_data, 'config': config_data})

## Download converted model

In [None]:
!aws s3 cp $estimator.model_data ./
!tar zxf model.tar.gz