In [15]:
import os 
import sys

'''
Create directory of dataset:
- efs_dir: mount point of efs
- dataset_dir: name of a directory to store dataset
'''
efs_dir = "/home/ec2-user/SageMaker/efs"
dataset_dir = "food101"
if not os.path.exists(efs_dir):
    print("Mount EFS on notebook instance. Food-101 dataset is very large.")
    sys.exit()
elif not os.path.exists(os.path.join(efs_dir, dataset_dir)):
    print("Creating dataset directory {}".format(os.path.join(efs_dir, dataset_dir)))
    os.mkdir(os.path.join(efs_dir, dataset_dir))
    
'''
Download dataset (This takes 20-30 minutes.)
'''
import urllib.request
url = 'http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz'
urllib.request.urlretrieve(url, os.path.join(efs_dir, dataset_dir, 'food-101.tar.gz'))

('/home/ec2-user/SageMaker/efs/food101/food-101.tar.gz',
 <http.client.HTTPMessage at 0x7f5ad412d128>)

In [None]:
import tarfile
file_name =  os.path.join(efs_dir, dataset_dir, 'food-101.tar.gz')
tar = tarfile.open(file_name, "r:gz")
tar.extractall(path=os.path.join(efs_dir, dataset_dir))
tar.close()

In [1]:
import os
efs_dir = "/home/ec2-user/SageMaker/efs"
dataset_dir = "food101"
food_name = 'ramen'
edge_dir = 'ramen_edge'
image_path = os.path.join(efs_dir, dataset_dir, 'food-101', 'images', food_name)
edge_path = os.path.join(efs_dir, dataset_dir,'food-101', 'images', edge_dir)

from PIL import Image, ImageFilter
if not os.path.exists(edge_path):
    print("Create directory to store edge-images")
    os.mkdir(edge_path)

count = 0
for f in os.listdir(image_path):
    print('\r extracting edge from images {}/{}'.format(str(count+1),len(os.listdir(image_path))), end="")
    image = Image.open(os.path.join(image_path, f))
    image = image.filter(ImageFilter.FIND_EDGES)
    image = image.filter(ImageFilter.CONTOUR)
    image = image.convert("L")           
    image = image.point(lambda x: 0 if x < 100 else x)
    image.save(os.path.join(edge_path, f)) 
    count+=1

 extracting edge from images 1000/1000

In [2]:
import sagemaker
from sagemaker.mxnet import MXNet

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

input_img = sagemaker_session.upload_data(path=edge_path, key_prefix='food101/edge_ramen')
output_img  = sagemaker_session.upload_data(path=image_path, key_prefix='food101/image_ramen')

In [3]:
!wget https://raw.githubusercontent.com/awslabs/amazon-sagemaker-examples/master/sagemaker-python-sdk/mxnet_gluon_cifar10/setup.sh
!sh ./setup.sh

--2018-09-16 02:55:47--  https://raw.githubusercontent.com/awslabs/amazon-sagemaker-examples/master/sagemaker-python-sdk/mxnet_gluon_cifar10/setup.sh
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.88.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.88.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2445 (2.4K) [text/plain]
Saving to: ‘setup.sh.1’


2018-09-16 02:55:47 (46.0 MB/s) - ‘setup.sh.1’ saved [2445/2445]

Loaded plugins: dkms-build-requires, priorities, update-motd, upgrade-helper
Resolving Dependencies
--> Running transaction check
---> Package docker.x86_64 0:17.12.1ce-1.135.amzn1 will be erased
--> Finished Dependency Resolution

Dependencies Resolved

 Package     Arch        Version                       Repository          Size
Removing:
 docker      x86_64      17.12.1ce-1.135.amzn1         @amzn-updates      109 M

Transaction Summary
Remove  1 Package

Installed size: 109 M
Downloa

--> Finished Dependency Resolution

Dependencies Resolved

 Package                   Arch   Version        Repository                Size
Installing:
 nvidia-docker2            noarch 2.0.3-1.docker17.09.1.ce.amzn1
                                                 nvidia-docker            4.3 k
Installing for dependencies:
 libnvidia-container-tools x86_64 1.0.0-0.1.rc.2 libnvidia-container       34 k
 libnvidia-container1      x86_64 1.0.0-0.1.rc.2 libnvidia-container       80 k
 nvidia-container-runtime  x86_64 2.0.0-1.docker17.09.1.amzn1
                                                 nvidia-container-runtime 2.1 M
 nvidia-container-runtime-hook
                           x86_64 1.4.0-1.amzn1  nvidia-container-runtime 706 k

Transaction Summary
Install  1 Package (+4 Dependent packages)

Total download size: 2.9 M
Installed size: 8.2 M
Downloading packages:
(1/5): libnvidia-container-tools-1.0.0-0.1.rc.2.x86_64.rpm |  34 kB   00:00     
(2/5): nvidia-docker2-2.0.3-1.docker17.09.1.c

In [8]:
mxnet_estimator = MXNet('pix2pix.py',
                          role=role, 
                          train_instance_count=1,
                          train_instance_type='ml.p3.2xlarge',
                          hyperparameters={'batch_size': 32,
                                           'epochs': 100,
                                           'learning_rate': 0.0002,
                                           'beta1': 0.5,
                                           'lambda1': 100})

In [None]:
mxnet_estimator.fit({'feature': input_img, 'label': output_img})

INFO:sagemaker:Creating training-job with name: sagemaker-mxnet-2018-09-16-03-10-29-000


..........................
[31m2018-09-16 03:14:35,818 INFO - root - running container entrypoint[0m
[31m2018-09-16 03:14:35,818 INFO - root - starting train task[0m
[31m2018-09-16 03:14:35,838 INFO - container_support.training - Training starting[0m
[31m2018-09-16 03:14:36,814 INFO - mxnet_container.train - MXNetTrainingEnvironment: {'enable_cloudwatch_metrics': False, 'available_gpus': 1, 'channels': {u'feature': {u'TrainingInputMode': u'File', u'RecordWrapperType': u'None', u'S3DistributionType': u'FullyReplicated'}, u'label': {u'TrainingInputMode': u'File', u'RecordWrapperType': u'None', u'S3DistributionType': u'FullyReplicated'}}, '_ps_verbose': 0, 'resource_config': {u'hosts': [u'algo-1'], u'network_interface_name': u'ethwe', u'current_host': u'algo-1'}, 'user_script_name': u'pix2pix.py', 'input_config_dir': '/opt/ml/input/config', 'channel_dirs': {u'feature': u'/opt/ml/input/data/feature', u'label': u'/opt/ml/input/data/label'}, 'code_dir': '/opt/ml/code', 'output_data_di

[31m2018-09-16 03:15:28,560:307:INFO:discriminator loss = 0.517399, generator loss = 38.826809, binary training acc = 0.823576 at iter 0 epoch 3[0m
[31m2018-09-16 03:15:28,560 INFO - LoggingTest - discriminator loss = 0.517399, generator loss = 38.826809, binary training acc = 0.823576 at iter 0 epoch 3[0m
[31m2018-09-16 03:15:31,800:304:INFO:speed: 93.118293177 samples/s[0m
[31m2018-09-16 03:15:31,800 INFO - LoggingTest - speed: 93.118293177 samples/s[0m
[31m2018-09-16 03:15:32,009:307:INFO:discriminator loss = 0.144635, generator loss = 42.341698, binary training acc = 0.911512 at iter 10 epoch 3[0m
[31m2018-09-16 03:15:32,009 INFO - LoggingTest - discriminator loss = 0.144635, generator loss = 42.341698, binary training acc = 0.911512 at iter 10 epoch 3[0m
[31m2018-09-16 03:15:35,250:304:INFO:speed: 92.8777490293 samples/s[0m
[31m2018-09-16 03:15:35,250 INFO - LoggingTest - speed: 92.8777490293 samples/s[0m
[31m2018-09-16 03:15:35,458:307:INFO:discriminator loss = 0

[31m2018-09-16 03:16:12,552:304:INFO:speed: 92.7216410806 samples/s[0m
[31m2018-09-16 03:16:12,552 INFO - LoggingTest - speed: 92.7216410806 samples/s[0m
[31m2018-09-16 03:16:12,761:307:INFO:discriminator loss = 0.371945, generator loss = 29.732668, binary training acc = 0.776476 at iter 0 epoch 7[0m
[31m2018-09-16 03:16:12,761 INFO - LoggingTest - discriminator loss = 0.371945, generator loss = 29.732668, binary training acc = 0.776476 at iter 0 epoch 7[0m
[31m2018-09-16 03:16:16,015:304:INFO:speed: 92.9408414813 samples/s[0m
[31m2018-09-16 03:16:16,015 INFO - LoggingTest - speed: 92.9408414813 samples/s[0m
[31m2018-09-16 03:16:16,225:307:INFO:discriminator loss = 0.135120, generator loss = 30.799549, binary training acc = 0.923194 at iter 10 epoch 7[0m
[31m2018-09-16 03:16:16,225 INFO - LoggingTest - discriminator loss = 0.135120, generator loss = 30.799549, binary training acc = 0.923194 at iter 10 epoch 7[0m
[31m2018-09-16 03:16:19,477:304:INFO:speed: 94.0866618578