In [1]:
import pandas as pd
import numpy as np
import os
import sagemaker
import json

from sagemaker.tensorflow import TensorFlow

In [2]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()

## Load constants and directories

In [3]:
constants = json.load( open( "helpers/objects/constants_dict.json", "r" ) )
tag2idx = json.load( open("helpers/objects/tag2idx.json", "r"))
data_directories = json.load( open("helpers/objects/data_directories.json", "r"))

In [4]:
print('Constants:')
display(constants)
print('Directories:')
display(data_directories)

Constants:


{'N_WORDS': 35178, 'N_TAGS': 17, 'MAX_LEN': 50}

Directories:


{'train_data_directory': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_train.csv',
 'test_data_directory': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_test.csv'}

## Model using GPU instance

In [None]:
! pygmentize models/train_bilstm.py

### Define hyperparameters

In [5]:
hyperparameters={
    'epochs': 5,
    'batch-size': 64,
    'max-len': constants['MAX_LEN'],
    'n-tags': constants['N_TAGS'],
    'n-words': constants['N_WORDS'],
    'model-version': '1'
}

### Declare estiamtor and train the model

In [6]:
tf_estimator = TensorFlow(entry_point='train_bilstm.py', 
                          source_dir="models",
                          role=role,
                          train_instance_count=1, 
                          train_instance_type='ml.p2.xlarge',
                          framework_version='2.1.0', 
                          py_version='py3',
                          script_mode=True,
                          hyperparameters=hyperparameters
                         )

In [7]:
tf_estimator.fit({'training': data_directories['train_data_directory']})

2020-05-29 13:17:20 Starting - Starting the training job...
2020-05-29 13:17:22 Starting - Launching requested ML instances......
2020-05-29 13:18:26 Starting - Preparing the instances for training......
2020-05-29 13:19:29 Downloading - Downloading input data...
2020-05-29 13:20:08 Training - Downloading the training image.....[34m2020-05-29 13:21:03,221 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-05-29 13:21:03,667 sagemaker-containers INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "training": "/opt/ml/input/data/training"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "batch-size": 64,
        "max-len": 50,
        "n-words": 35178,
        "model_dir": "s3://sagemaker-eu-west-1-087816224558/ten

In [None]:
# tf_estimator.fit({'training': data_directories['train_data_directory'], 'validation': 's3://sagemaker-eu-west-1-087816224558/named_entity_recognition/bilstm_data/bilstm_train_y.npy'})

## Deploy model

In [8]:
tf_predictor = tf_estimator.deploy(initial_instance_count=1,
                                   instance_type='ml.m4.xlarge')

-----------!

## Clean up

In [9]:
tf_predictor.delete_endpoint()

In [None]:
# import boto3
# bucket_to_delete = boto3.resource('s3').Bucket(bucket)
# bucket_to_delete.objects.all().delete()