# Master Lab Setup

## Setup & Admin
This notebook is only used for setup and configuration of the environments. If your not runnning the labe, you shouldnt execute any of the code below!

#### Settings

In [11]:
# user info 
N_USERS = 1

# AWS account and s3 bucket 
# MODIFY  TO BE SPECIFIC FOR YOUR LAB
REGION = 'ap-southeast-2'
ACCOUNT= '013615763154'
BUCKET = 'grr.amazon.com-lab' 

# security
GROUP = 'labgroup'
POLICY = 'arn:aws:iam::aws:policy/AdministratorAccess'
NOTEBOOK_SERVICE_ROLE ='arn:aws:iam::013615763154:role/NotebookServiceRole'

# notebook config
VOLUME_SIZE=10
INSTANCE_TYPE = 'ml.t3.large'
'''
Available Instance Types may vary by region:
'ml.t2.medium'|'ml.t2.large'|'ml.t2.xlarge'|'ml.t2.2xlarge'|
'ml.t3.medium'|'ml.t3.large'|'ml.t3.xlarge'|'ml.t3.2xlarge'|
'ml.m4.xlarge'|'ml.m4.2xlarge'|'ml.m4.4xlarge'|'ml.m4.10xlarge'|'ml.m4.16xlarge'|
'ml.m5.xlarge'|'ml.m5.2xlarge'|'ml.m5.4xlarge'|'ml.m5.12xlarge'|'ml.m5.24xlarge'|
'ml.c4.xlarge'|'ml.c4.2xlarge'|'ml.c4.4xlarge'|'ml.c4.8xlarge'|
'ml.c5.xlarge'|'ml.c5.2xlarge'|'ml.c5.4xlarge'|'ml.c5.9xlarge'|'ml.c5.18xlarge'|
'ml.c5d.xlarge'|'ml.c5d.2xlarge'|'ml.c5d.4xlarge'|'ml.c5d.9xlarge'|'ml.c5d.18xlarge'|
'ml.p2.xlarge'|'ml.p2.8xlarge'|'ml.p2.16xlarge'|
'ml.p3.2xlarge'|'ml.p3.8xlarge'|'ml.p3.16xlarge'
'''

LABS_REQUIRED = ['deepar', 'groundtruth']

# GROUND TRUTH & IMAGES
#CODE_REPO = 'https://github.com/edenduthie/auto-labelling-model-tuning-amazon-sagemaker'
#IMPORT_BUCKET = 'grr.amazon.com-public-share'
#IMPORT_PATH = 'lab_deepar_data.zip'
MAX_IMAGES = 1000

print('configured settings')

configured settings


#### Imports and Config

In [12]:
import sys, boto3, random, string, time, json
from zipfile import ZipFile
from collections import defaultdict
from queue import Queue
from threading import Thread

s3 = boto3.resource('s3')
sgm = boto3.client('sagemaker', REGION)
iam = boto3.client('iam')

In [3]:
# generate usernames and password
count = 0
colors = ['yellow', 'orange', 'red', 'green', 'blue', 'purple', 'silver', 'magenta', 'cyan', 'gray']
things = ['fish', 'bird', 'bike', 'flower', 'house']
usernames = []
passwords = []

def randomPassword(stringLength=8):
    return ''.join(random.choice(string.ascii_lowercase) for i in range(stringLength))

for color in colors:
    for thing in things:
        if count < N_USERS:
            usernames.append(color + thing)
            passwords.append(randomPassword())
            count += 1

#### Set up individual labs
Copy an archive of data from a public S3 bucket into this notbooks server and then upload to the labs shared s3 bucket in uncompressed format.

In [13]:
if 'deepar' in LABS_REQUIRED:
    CODE_REPO = 'https://github.com/glyfnet/lab-sagemaker-deepar.git'
    IMPORT_BUCKET = 'grr.amazon.com-public-share'
    IMPORT_PATH = 'lab_deepar_data.zip'

    try:
        s3.Bucket(BUCKET).create(CreateBucketConfiguration={'LocationConstraint': REGION})
        print('created bucket {}'.format(BUCKET))
    except:
        print('unable to create bucket '+BUCKET)

    print('downloading s3://{}/{}'.format(IMPORT_BUCKET, IMPORT_PATH))
    s3.Object(IMPORT_BUCKET, IMPORT_PATH).download_file(IMPORT_PATH)

    with ZipFile(IMPORT_PATH, mode='r') as zip_ref:
        for info in zip_ref.infolist():
            with zip_ref.open(info) as file_ref:
                if '__'  not in info.filename and not info.is_dir():
                    key = 'labs/deepar/'+info.filename
                    s3.Object(BUCKET, key).upload_fileobj(file_ref)
                    print('wrote s3://{}/{}'.format(BUCKET, key))

unable to create bucket grr.amazon.com-lab
downloading s3://grr.amazon.com-public-share/lab_deepar_data.zip
wrote s3://grr.amazon.com-lab/labs/deepar/data/audusd_1m.csv
wrote s3://grr.amazon.com-lab/labs/deepar/data/audusd_1m_partial.csv
wrote s3://grr.amazon.com-lab/labs/deepar/data/eurusd_1m.csv
wrote s3://grr.amazon.com-lab/labs/deepar/data/usdjpy_1m.csv


In [14]:
if 'groundtruth' in LABS_REQUIRED:
    # Download and process the Open Images annotations.
    !wget https://storage.googleapis.com/openimages/2018_04/test/test-annotations-bbox.csv
    !wget https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json
    
    with open('bbox_labels_600_hierarchy.json', 'r') as f:
        hierarchy = json.load(f)
    
    CLASS_NAME = 'Bird'
    CLASS_ID = '/m/015p6'

    # Find all the subclasses of the desired image class (e.g. 'swans' and 'pigeons' etc if CLASS_NAME=='Bird').
    good_subclasses = set()
    def get_all_subclasses(hierarchy, good_subtree=False):
        if hierarchy['LabelName'] == CLASS_ID:
            good_subtree = True
        if good_subtree:
            good_subclasses.add(hierarchy['LabelName'])
        if 'Subcategory' in hierarchy:            
            for subcat in hierarchy['Subcategory']:
                get_all_subclasses(subcat, good_subtree=good_subtree)
        return good_subclasses
    good_subclasses = get_all_subclasses(hierarchy)

    # Find an appropriate number of images with at least one bounding box in the desired category
    fids2bbs = defaultdict(list)
    # Skip images with risky content.
    skip_these_images = ['251d4c429f6f9c39', 
                        '065ad49f98157c8d']

    with open('test-annotations-bbox.csv', 'r') as f:
        for line in f.readlines()[1:]:
            line = line.strip().split(',')
            img_id, _, cls_id, conf, xmin, xmax, ymin, ymax, *_ = line
            if img_id in skip_these_images:
                continue
            if cls_id in good_subclasses:
                fids2bbs[img_id].append([CLASS_NAME, xmin, xmax, ymin, ymax])
                if len(fids2bbs) == MAX_IMAGES:
                    break

    class CopyWorker(Thread):
        def __init__(self, queue, src_bucket_name, dst_bucket_name):
            self._queue = queue
            self._src_bucket_name = src_bucket_name
            self._dst_bucket = s3.Bucket(dst_bucket_name)
            super(CopyWorker, self).__init__()

        def run(self):
            while True:
                srckey, destkey = self._queue.get()
                self._dst_bucket.copy(
                    CopySource={'Bucket': self._src_bucket_name,'Key': srckey},
                    Key= destkey,
                )
                self._queue.task_done()
                sys.stdout.write('Copying {}/{}     \r'.format(MAX_IMAGES-copy_queue.qsize(), MAX_IMAGES))
                sys.stdout.flush() 

    # create a thread queue and start processing
    copy_queue = Queue(maxsize=1000)
    for thread in range(20):
        worker = CopyWorker(copy_queue, 'open-images-dataset', BUCKET)
        worker.daemon = True
        worker.start()

    # Copy the images to our local bucket.
    for img_id_id, img_id in enumerate(fids2bbs.keys()):
        srckey = 'test/{}.jpg'.format(img_id)
        destkey = '{}/images/{}.jpg'.format('labs/groundtruth', img_id)   
        copy_queue.put((srckey,destkey))

    copy_queue.join()

    print('\nDone!')

--2019-05-18 03:40:31--  https://storage.googleapis.com/openimages/2018_04/test/test-annotations-bbox.csv
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.167.80, 2404:6800:4006:805::2010
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.167.80|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 52174204 (50M) [text/csv]
Saving to: ‘test-annotations-bbox.csv.32’


2019-05-18 03:40:34 (23.2 MB/s) - ‘test-annotations-bbox.csv.32’ saved [52174204/52174204]

--2019-05-18 03:40:34--  https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.167.80, 2404:6800:4006:806::2010
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.167.80|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 86291 (84K) [text/csv]
Saving to: ‘bbox_labels_600_hierarchy.json.32’


2019-05-18 03:40:35 (28.8 MB/s) - ‘bbox_labels_6

In [16]:
    # Create and upload the full input manifest.
    def create_manifest(name, size=None):   
        count = 0
        with open(name, 'w') as f:
            for img_id_id, img_id in enumerate(fids2bbs.keys()):
                img_path = 's3://{}/{}/images/{}.jpg'.format(BUCKET, 'labs/groundtruth', img_id)
                f.write('{"source-ref": "' + img_path +'"}\n')
                count += 1
                if size is not None and count==size:
                    break;
                    
        s3.Bucket(BUCKET).upload_file(name, 'labs/groundtruth' + '/' + name)
    
    create_manifest('input.manifest')
    create_manifest('test.input.manifest', 10)

In [None]:
# create group labgroup with view only access to console
try:
    iam.create_group(GroupName=GROUP)
    iam.attach_group_policy(GroupName=GROUP, PolicyArn=POLICY)
    print('created group {} with policy {}'.format(GROUP, POLICY))
except:
    print('unable to create group '+ GROUP)

In [None]:
# create users and add to lab group
count = 0
for username, password in zip(usernames,passwords): 
    try:
        iam.create_user(UserName=username)
        iam.add_user_to_group(GroupName=GROUP, UserName=username)
        iam.create_login_profile(UserName=username, Password=password)
        print('created user '+username)
        count += 1
    except:
        print('unable to create user ' + username)
print('created {} users'.format(count))

#### Create Notebooks

In [None]:
# create notebooks
count = 0
for username in usernames:
    try:
        response = sgm.create_notebook_instance(
            NotebookInstanceName=username,
            InstanceType=INSTANCE_TYPE,
            RoleArn=NOTEBOOK_SERVICE_ROLE,
            DirectInternetAccess='Enabled',
            VolumeSizeInGB=VOLUME_SIZE,
            DefaultCodeRepository=CODE_REPO,
        )
        print('created notebook '+username) 
        count+=1
    except:
        print('unable to create notebook '+username) 
print('created {} notebooks'.format(count))

In [None]:
# create advice print out sheet
for username, password in zip(usernames, passwords):
    print('Login to console: https://console.aws.amazon.com ')
    print('account: {}   user: {}   password: {}\n'.format(ACCOUNT, username, password))
    print('Open your notebook: Go to Amazon SageMaker -> Notebook Instances\nOpen jupyter for {}\n'.format(username))
    print('Modify settings:')
    print('BUCKET: {}   labid: {}   user: {}\n\n\n'.format(BUCKET, LABID, username))

## CLEANUP ONLY!!!

In [None]:
# stop notebooks
for username in usernames:   
    try:
        response = sgm.stop_notebook_instance(NotebookInstanceName=username)
        print('stopped notebook '+username)
    except: 
        print('unable to stop notebook '+username)

In [None]:
# delete all users & groups
for username in usernames:
    try:
        response = iam.remove_user_from_group(GroupName=GROUP, UserName=username)
        response = iam.delete_login_profile(UserName=username)
        response = iam.delete_user(UserName=username)
        print('removed user '+username)
    except:
        print('unable to remove user '+username)

In [None]:
try:
    iam.detach_group_policy(GroupName=GROUP, PolicyArn=POLICY)
    iam.delete_group(GroupName=GROUP)
    print('removed group '+GROUP)
except:
    print('unable to remove group '+GROUP)

In [None]:
# delete notebooks
for username in usernames:   
    try:
        response = sgm.delete_notebook_instance(NotebookInstanceName=username)
        print('deleted notebook '+username)
    except: 
        print('unable to delete notebook '+username)