# AWS Basic setup

Enable access to Amazon services like S3:
- get_execution_role: allow Amazon SageMaker to assume the role created during instance creation and accesses resources on your behalf.

In [1]:
import boto3
from sagemaker import get_execution_role

role = get_execution_role()

#### Define a bucket
This will hosts the dataset that will be used.

In [2]:
bucket = 'dataen-sagemaker-dev/datasets/cal-tech' 

#### Define the containers
Containers (docker containers) as the training job defined in this notebook will run in the container for your region.

In [3]:
containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/image-classification:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/image-classification:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/image-classification:latest'}
training_image = containers[boto3.Session().region_name]

#### Import dataset

In [4]:
import os
import re
import urllib.request
import boto3


def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)


def upload_to_s3(bucket, file, channel):
    """
    Stores the file in the 'channel' folder within the specified bucket
    If bucket contains folders in the path that is 
    """
    subfolder = None
    if "/" in bucket:
        buc_folders = bucket.split('/')
        bucket = buc_folders[0]
        subfolder = buc_folders[1:]
    if isinstance(bucket, list):
        bucket, subfolder = bucket[0], bucket[1:]
        
    if subfolder:
        key = "/".join(subfolder) + "/" + channel + "/" + file
    else:
        key = channel + "/" + file
    
    # Read file
    with open(file, "rb") as data:
        s3 = boto3.resource('s3')
        s3.Bucket(bucket).put_object(Key=key, Body=data)

In [None]:
# caltech-256
download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')
upload_to_s3('train', 'caltech-256-60-train.rec', bucket)
download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')
upload_to_s3('validation', 'caltech-256-60-val.rec', bucket)