In [15]:
import os 
import sys

'''
Create directory of dataset:
- efs_dir: mount point of efs
- dataset_dir: name of a directory to store dataset
'''
efs_dir = "/home/ec2-user/SageMaker/efs"
dataset_dir = "food101"
if not os.path.exists(efs_dir):
    print("Mount EFS on notebook instance. Food-101 dataset is very large.")
    sys.exit()
elif not os.path.exists(os.path.join(efs_dir, dataset_dir)):
    print("Creating dataset directory {}".format(os.path.join(efs_dir, dataset_dir)))
    os.mkdir(os.path.join(efs_dir, dataset_dir))
    
'''
Download dataset (This takes 20-30 minutes.)
'''
import urllib.request
url = 'http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz'
urllib.request.urlretrieve(url, os.path.join(efs_dir, dataset_dir, 'food-101.tar.gz'))

('/home/ec2-user/SageMaker/efs/food101/food-101.tar.gz',
 <http.client.HTTPMessage at 0x7f5ad412d128>)

In [None]:
import tarfile
file_name =  os.path.join(efs_dir, dataset_dir, 'food-101.tar.gz')
tar = tarfile.open(file_name, "r:gz")
tar.extractall(path=os.path.join(efs_dir, dataset_dir))
tar.close()

In [5]:
food_name = 'ramen'
edge_dir = 'ramen_edge'
image_path = os.path.join(efs_dir, dataset_dir, 'food-101', 'images', food_name)
edge_path = os.path.join(efs_dir, dataset_dir,'food-101', 'images', edge_dir)

from PIL import Image, ImageFilter
if not os.path.exists(edge_path):
    print("Create directory to store edge-images")
    os.mkdir(edge_path)

count = 0
for f in os.listdir(image_path):
    print('\r extracting edge from images {}/{}'.format(str(count+1),len(os.listdir(image_path))), end="")
    image = Image.open(os.path.join(image_path, f))
    image = image.filter(ImageFilter.FIND_EDGES)
    image = image.filter(ImageFilter.CONTOUR)
    image = image.convert("L")           
    image = image.point(lambda x: 0 if x < 100 else x)
    image.save(os.path.join(edge_path, f)) 
    count+=1

 extracting edge from images 1000/1000

In [6]:
import sagemaker
from sagemaker.mxnet import MXNet

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

input_img = sagemaker_session.upload_data(path=edge_path, key_prefix='food101/edge_ramen')
output_img  = sagemaker_session.upload_data(path=image_path, key_prefix='food101/image_ramen')

In [7]:
!wget https://raw.githubusercontent.com/awslabs/amazon-sagemaker-examples/master/sagemaker-python-sdk/mxnet_gluon_cifar10/setup.sh
!sh ./setup.sh

--2018-09-16 01:18:04--  https://raw.githubusercontent.com/awslabs/amazon-sagemaker-examples/master/sagemaker-python-sdk/mxnet_gluon_cifar10/setup.sh
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.72.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.72.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2445 (2.4K) [text/plain]
Saving to: ‘setup.sh.1’


2018-09-16 01:18:04 (50.4 MB/s) - ‘setup.sh.1’ saved [2445/2445]

SageMaker instance route table setup is ok. We are good to go.
SageMaker instance routing for Docker is ok. We are good to go!


In [14]:
mxnet_estimator = MXNet('pix2pix.py',
                          role=role, 
                          train_instance_count=1,
                          train_instance_type='local',
                          hyperparameters={'batch_size': 32,
                                           'epochs': 100,
                                           'learning_rate': 0.0002,
                                           'beta1': 0.5,
                                           'lambda1': 100})

In [15]:
mxnet_estimator.fit({'feature': input_img, 'label': output_img})

INFO:sagemaker:Creating training-job with name: sagemaker-mxnet-2018-09-16-01-22-37-707


Pulling algo-1-74V7D (520713654638.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-mxnet:1.2-cpu-py2)...
1.2-cpu-py2: Pulling from sagemaker-mxnet
Digest: sha256:9292494350b8d76e0771516fef512efad1ac839033fd5a43ac0585069f9252af
Status: Downloaded newer image for 520713654638.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-mxnet:1.2-cpu-py2
Creating tmpmgi60gnn_algo-1-74V7D_1 ... 
[1BAttaching to tmpmgi60gnn_algo-1-74V7D_12mdone[0m
[36malgo-1-74V7D_1  |[0m 2018-09-16 01:25:25,097 INFO - root - running container entrypoint
[36malgo-1-74V7D_1  |[0m 2018-09-16 01:25:25,098 INFO - root - starting train task
[36malgo-1-74V7D_1  |[0m 2018-09-16 01:25:25,104 INFO - container_support.training - Training starting
[36malgo-1-74V7D_1  |[0m 2018-09-16 01:25:25,583 INFO - mxnet_container.train - MXNetTrainingEnvironment: {'enable_cloudwatch_metrics': False, 'available_gpus': 0, 'channels': {u'feature': {u'ContentType': u'application/octet-stream'}, u'label': {u'ContentType': u'application/oct

RuntimeError: Failed to run: ['docker-compose', '-f', '/tmp/tmpmgi60gnn/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1