# Run Blueoil on Amazon SageMaker
## Docker build and push (to Amazon ECR)

In [None]:
# please add 2 lines in Dockerfile:
# ENV USE_HOROVOD="True"
# ENV CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"

!bash ./docker_push_ecr.sh blueoil-sagemaker-dist blueoil/blueoil:v0.22.0

## Preparing data (upload a face images subset of OpenimagesV4 to Amazon S3)
### Create sagemaker session

In [None]:
import sagemaker
sess = sagemaker.Session()

In [None]:
import os
import shutil

def upload_data(sess, path, key_prefix='data', compress=False):
    if compress:
        path = shutil.make_archive(path, 'gztar', '.', path)
    s3_data = sess.upload_data(path=path, key_prefix=key_prefix)
    return s3_data

### Download a face image subset of OpenimagesV4 dataset

In [None]:
# !curl -O https://s3-ap-northeast-1.amazonaws.com/leapmind-public-storage/datasets/openimages_classification.tgz
# !tar xf openimages_classification.tgz

### Upload dataset

In [None]:
%%time
# train_data = upload_data(sess, 'openimagesv4_10', compress=True)

# ==== delete later ====
# single dataset channel with compression 
dataset = 's3://hariby-iad/dataset/openimagesv4_10.tar.gz'
# dataset = 'file:///home/ec2-user/SageMaker/dataset'
# dataset = 'file:///fsx/dataset'

# train/validation in separate channel
# hariby test
# train_data = 's3://hariby-iad/dataset/openimagesv4_10/train'
# validation_data = 's3://hariby-iad/dataset/openimagesv4_10/validation'
# # for local test
# train_data = 'file:///home/ec2-user/SageMaker/openimagesv4_10/train/'
# validation_data = 'file:///home/ec2-user/SageMaker/openimagesv4_10/validation/'

## Preparing config (upload to Amazon S3)

### Create config file
https://docs.blueoil.org/tutorial/image_det.html

create `openimages_objectdetection_sample.py` 

by 
```
blueoil init -o openimages_objectdetection_sample.py
```
with
```
dataset:
  format:                  OpenImagesV4
  train dataset path:      /opt/ml/input/data/dataset/train/
  validation dataset path: /opt/ml/input/data/dataset/validation/
```

In [None]:
%%time
config_data = upload_data(sess, 'openimages_objectdetection_sample.py', key_prefix='config', compress=False)

In [None]:
# # Retrieve data location

# train_data  = 's3://' + sagemaker.Session().default_bucket() + '/data/openimages_face.tar.gz'
# config_data += 's3://' + sagemaker.Session().default_bucket() + '/config/openimages_face_sample.yml'
print(config_data)
print(dataset)
# print(train_data)
# print(validation_data)

## Train

### On Amazon Sagemaker on-demand instance
#### Create session

In [None]:
import boto3

algorithm_name = 'blueoil-sagemaker-dist'

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

#### Storage configuration

In [None]:
from sagemaker.inputs import FileSystemInput

# Specify EFS ile system id.
file_system_id = 'fs-xxxxxxxxxxxxxxxxx' 
print(f"FSx file-system-id: {file_system_id}")

# Specify directory path for input data on the file system. 
# You need to provide normalized and absolute path below.
file_system_directory_path = '/xxxxxxxx/dataset'
print(f'FSx file-system data input path: {file_system_directory_path}')

# Specify the access mode of the mount of the directory associated with the file system. 
# Directory must be mounted  'ro'(read-only).
file_system_access_mode = 'rw'

# Specify your file system type
file_system_type = 'FSxLustre'

lustre = FileSystemInput(file_system_id=file_system_id,
                                    file_system_type=file_system_type,
                                    directory_path=file_system_directory_path,
                                    file_system_access_mode=file_system_access_mode)

#### Run trainning

In [None]:
import sagemaker
from sagemaker.estimator import Estimator

# train_instance_type = 'local_gpu'
train_instance_type = 'ml.p3.16xlarge'
train_instance_count=1

blueoil_experiment_id = 'objectdetection_opanimagesv4_10_sample'

estimator = Estimator(
    image_name=ecr_image, 
    role=sagemaker.get_execution_role(), 
    train_instance_count=train_instance_count, 
    train_instance_type=train_instance_type, 
    train_volume_size=256,  
    hyperparameters={
        'config': '/opt/ml/input/data/config/openimages_objectdetection_sample.py', 
        'experiment_id': blueoil_experiment_id
    }, 
    subnets = ['subnet-xxxxxxxx'], 
    security_group_ids = ['sg-xxxxxxxx'], 
    train_max_run=5*24*60*60, 
    base_job_name=f'blueoil-sagemaker-dist-hvd-{train_instance_count}nodes'
)

In [None]:
# estimator.fit({'dataset': dataset, 'config': config_data})
# estimator.fit({'train': train_data, 'validation': validation_data, 'config': config_data})

# Lustre
estimator.fit({'dataset': lustre, 'config': config_data})

## Convert
### On Amazon Sagemaker on-demand instance

In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

convert_instance_type = 'ml.m5.xlarge'

processor = ScriptProcessor(
    image_uri=ecr_image,
    role=sagemaker.get_execution_role(),
    command=['python3'],
    base_job_name="blueoil-convert",
    instance_count=1,
    instance_type=convert_instance_type)

In [None]:
trained_model = estimator.model_data
converted_model = os.path.join(os.path.dirname(trained_model), 'converted')

In [None]:
processor.run(code='script/main.py',
    inputs=[
        ProcessingInput(source=train_data, destination='/opt/ml/processing/input/data/dataset'),
        ProcessingInput(source=estimator.model_data, destination='/opt/ml/processing/input/data/model'),
    ],
    outputs=[
        ProcessingOutput(source='/opt/ml/processing/output/converted', destination=converted_model),
    ],
    arguments=['convert', '--experiment_id', blueoil_experiment_id],
)

print(f"Converted models are saved to {converted_model}")

## Download converted model

In [None]:
!aws s3 cp $converted_model ./ --recursive
!tar zxvf output.tar.gz