In [1]:
import pandas as pd
import numpy as np
import os
import sagemaker
import json

from sagemaker.tensorflow import TensorFlow

In [2]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()

print(f'Bucket: {bucket}')
print(f'Role: {role}')

Bucket: sagemaker-eu-west-1-087816224558
Role: arn:aws:iam::087816224558:role/service-role/AmazonSageMaker-ExecutionRole-20200424T125478


## Load constants and directories

In [3]:
constants = json.load( open( "helpers/objects/constants_dict.json", "r" ) )
tag2idx = json.load( open("helpers/objects/tag2idx.json", "r"))
data_directories = json.load( open("helpers/objects/data_directories.json", "r"))

In [4]:
print('Constants:')
display(constants)
print('Directories:')
display(data_directories)

Constants:


{'N_WORDS': 35178, 'N_TAGS': 17, 'MAX_LEN': 50}

Directories:


{'train_data_directory': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_train.csv',
 'test_data_directory': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_test.csv'}

# Model

In [6]:
! pygmentize models/train_bilstm.py

[34mimport[39;49;00m [04m[36margparse[39;49;00m, [04m[36mos[39;49;00m
[34mimport[39;49;00m [04m[36mnumpy[39;49;00m [34mas[39;49;00m [04m[36mnp[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m
[34mimport[39;49;00m [04m[36msubprocess[39;49;00m
[34mimport[39;49;00m [04m[36msys[39;49;00m

[34mfrom[39;49;00m [04m[36mtensorflow.keras[39;49;00m [34mimport[39;49;00m Model, Sequential
[34mfrom[39;49;00m [04m[36mtensorflow.keras.layers[39;49;00m [34mimport[39;49;00m (
    Embedding,
    SpatialDropout1D,
    Bidirectional,
    LSTM,
    TimeDistributed,
    Dense
)
[34mfrom[39;49;00m [04m[36mtensorflow.keras.callbacks[39;49;00m [34mimport[39;49;00m EarlyStopping, Callback


[34mclass[39;49;00m [04m[32mBiLSTM[39;49;00m(Model):

    [34mdef[39;49;00m [32m__init__[39;49;00m([36mself[39;49;00m):
        [36msuper[39;49;00m(BiLSTM, [36m

### Define hyperparameters

In [14]:
hyperparameters={
    'epochs': 20,
    'batch-size': 32,
    'embed-dim': 64,
    'hidden-units': 128,
    'max-len': constants['MAX_LEN'],
    'n-tags': constants['N_TAGS'],
    'n-words': constants['N_WORDS'],
    'model-version': '1'
}

### Declare estimator and train the model

In [15]:
tf_estimator = TensorFlow(entry_point='train_bilstm.py', 
                          source_dir="models",
                          role=role,
                          train_instance_count=1, 
                          train_instance_type='ml.p2.xlarge',
                          framework_version='2.1.0', 
                          py_version='py3',
                          script_mode=True,
                          hyperparameters=hyperparameters
                         )

In [16]:
tf_estimator.fit({'training': data_directories['train_data_directory']})

2020-05-29 19:35:10 Starting - Starting the training job...
2020-05-29 19:35:13 Starting - Launching requested ML instances......
2020-05-29 19:36:19 Starting - Preparing the instances for training......
2020-05-29 19:37:36 Downloading - Downloading input data......
2020-05-29 19:38:24 Training - Downloading the training image...
2020-05-29 19:39:07 Training - Training image download completed. Training in progress...[34m2020-05-29 19:39:12,214 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-05-29 19:39:12,712 sagemaker-containers INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "training": "/opt/ml/input/data/training"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "embed-dim": 64,
        "batch-size": 

## Deploy model

In [17]:
tf_predictor = tf_estimator.deploy(initial_instance_count=1,
                                   instance_type='ml.m4.xlarge',
                                   endpoint_name='ner-bartek-111')

-------------!

In [20]:
tf_predictor.endpoint

'tensorflow-training-2020-05-29-19-35-10-455'

## Clean up

In [None]:
tf_predictor.delete_endpoint()

In [None]:
# import boto3
# bucket_to_delete = boto3.resource('s3').Bucket(bucket)
# bucket_to_delete.objects.all().delete()