In [1]:
import pandas as pd
import numpy as np
import os
import sagemaker
import json

from sagemaker.tensorflow import TensorFlow

In [2]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()

## Load constants and directories

In [3]:
constants = json.load( open( "helpers/objects/constants_dict.json", "r" ) )
tag2idx = json.load( open("helpers/objects/tag2idx.json", "r"))
data_directories = json.load( open("helpers/objects/data_directories.json", "r"))

In [4]:
print('Constants:')
display(constants)
print('Directories:')
display(data_directories)

Constants:


{'N_WORDS': 35178, 'N_TAGS': 17, 'MAX_LEN': 50}

Directories:


{'train_data_directory': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_train.csv',
 'test_data_directory': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_test.csv'}

## Model using GPU instance

In [3]:
! pygmentize models/train_bilstm.py

[34mimport[39;49;00m [04m[36margparse[39;49;00m, [04m[36mos[39;49;00m
[34mimport[39;49;00m [04m[36mnumpy[39;49;00m [34mas[39;49;00m [04m[36mnp[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m

[34mfrom[39;49;00m [04m[36mtensorflow.keras[39;49;00m [34mimport[39;49;00m Model
[34mfrom[39;49;00m [04m[36mtensorflow.keras.layers[39;49;00m [34mimport[39;49;00m (
    Embedding,
    SpatialDropout1D,
    Bidirectional,
    LSTM,
    TimeDistributed,
    Dense
)

[34mclass[39;49;00m [04m[32mBiLSTM[39;49;00m(Model):

    [34mdef[39;49;00m [32m__init__[39;49;00m([36mself[39;49;00m):
        [36msuper[39;49;00m(BiLSTM, [36mself[39;49;00m).[32m__init__[39;49;00m()
        [36mself[39;49;00m.embedding = Embedding(input_dim=N_WORDS, output_dim=[34m50[39;49;00m, input_length=MAX_LEN)
        [36mself[39;49;00m.spatial_dropout = SpatialDropout1D(

### Define hyperparameters

In [25]:
hyperparameters={
    'epochs': 5,
    'batch-size': 64,
    'max-len': constants['MAX_LEN'],
    'n-tags': constants['N_TAGS'],
    'n-words': constants['N_WORDS'],
    'model-version': '1'
}

### Declare estiamtor and train the model

In [26]:
tf_estimator = TensorFlow(entry_point='train_bilstm.py', 
                          source_dir="models",
                          role=role,
                          train_instance_count=1, 
                          train_instance_type='ml.p2.xlarge',
                          framework_version='2.1.0', 
                          py_version='py3',
                          script_mode=True,
                          hyperparameters=hyperparameters
                         )

In [27]:
tf_estimator.fit({'training': data_directories['train_data_directory']})

2020-05-28 12:31:07 Starting - Starting the training job...
2020-05-28 12:31:10 Starting - Launching requested ML instances.........
2020-05-28 12:32:43 Starting - Preparing the instances for training......
2020-05-28 12:33:59 Downloading - Downloading input data...
2020-05-28 12:34:29 Training - Downloading the training image......
2020-05-28 12:35:35 Training - Training image download completed. Training in progress..[34m2020-05-28 12:35:39,962 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-05-28 12:35:40,456 sagemaker-containers INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "training": "/opt/ml/input/data/training"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "batch-size": 64,
        "max-len": 

## Deploy model

In [28]:
tf_predictor = tf_estimator.deploy(initial_instance_count=1,
                                   instance_type='ml.m4.xlarge')

-----------!

## Clean up

In [None]:
# tf_predictor.delete_endpoint()

In [None]:
# import boto3
# bucket_to_delete = boto3.resource('s3').Bucket(bucket)
# bucket_to_delete.objects.all().delete()