### Bring Your Own Script on SageMaker 
* Prepare the script 
* Get the trained model 
* Inference on Tensorflow 2.2.0 

In [None]:
import os
import sagemaker
import boto3 
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()
region = sagemaker_session.boto_session.region_name
client = boto3.client('sagemaker')

In [None]:
from sagemaker.tensorflow import TensorFlow


vgg16_estimator = TensorFlow(entry_point='tf22-byos/merge.py',
                             role=role,
                             train_instance_count=1,
                             train_instance_type='ml.p3.2xlarge',
                             framework_version='2.1.0',
                             py_version='py3')


#### training locally 
* test container locally by dogs_and_cat.zip 

In [None]:
import boto3
region = boto3.session.Session().region_name
bucket = 'ws-yolov4-yianc'

In [None]:
from datetime import datetime
now = datetime.now()
job_name = 'vgg16-' + now.strftime("%Y-%m-%d-%H-%M-%S")
job_name

In [None]:
import sagemaker
from sagemaker import get_execution_role
role = get_execution_role()

In [None]:

train='s3://{}/dogs_and_cat/input/data/train/'.format(bucket)
validation='s3://{}/dogs_and_cat/input/data/validation/'.format(bucket)
outpath='s3://{}/model/'.format(bucket)
(train, validation, outpath) 


In [None]:

vgg16_estimator.fit({'train': train,
               'validation': validation})

In [None]:
vgg16_estimator.latest_training_job.name

In [None]:
response = client.describe_training_job(TrainingJobName=vgg16_estimator.latest_training_job.name)
model_path = response['ModelArtifacts']['S3ModelArtifacts']

In [None]:
!aws s3 cp s3://sagemaker-us-east-1-230755935769/tensorflow-training-2020-11-05-12-02-53-032/output/model.tar.gz . 
!tar -xvf model.tar.gz
!tar -xvf tlmodel.tar.gz 

In [None]:
!pip install tensorflow-gpu==2.2.0

In [None]:
import tensorflow as tf 

new_model = tf.keras.models.load_model('tlmodel')

# Check its architecture
new_model.summary()


In [None]:
import os 
import pathlib
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')

train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')

train_dir = pathlib.Path(train_dir)
validation_dir = pathlib.Path(validation_dir)

BATCH_SIZE = 32
IMG_SIZE = (160, 160)


In [None]:
def get_label(file_path):
  # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
    one_hot = class_names == parts[-2]
    one_hot = tf.dtypes.cast(one_hot, tf.int32)
  # Integer encode the label
    return tf.cast(tf.argmax(one_hot), tf.int64) 


img_height = IMG_SIZE[0]
img_width = IMG_SIZE[0]
def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_height, img_width])
    return img

def process_path(file_path):
    print(file_path)
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label


In [None]:
import numpy as np 
def get_dataset(data_dir): 
    image_count = len(list(data_dir.glob('*/*.jpg')))
    print(image_count)
    list_ds = tf.data.Dataset.list_files(str(train_dir/'*/*'), shuffle=False)
    list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
    return list_ds

train_dataset = get_dataset(train_dir)
val_dataset = get_dataset(validation_dir)

class_names = np.array(sorted([item.name for item in train_dir.glob('*') if item.name != "LICENSE.txt"]))
print(class_names)


In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_dataset = train_dataset.map(process_path, num_parallel_calls=AUTOTUNE)
val_dataset = val_dataset.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
batch_size = 32

def configure_for_performance(ds):
  ds = ds.cache()
  ds = ds.shuffle(buffer_size=1000)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(buffer_size=AUTOTUNE)
  return ds

train_dataset = configure_for_performance(train_dataset)

val_dataset = configure_for_performance(val_dataset)

In [None]:
import matplotlib.pyplot as plt
image_batch, label_batch = val_dataset.as_numpy_iterator().next()
predictions = new_model.predict_on_batch(image_batch).flatten()

# Apply a sigmoid since our model returns logits
predictions = tf.nn.sigmoid(predictions)
predictions = tf.where(predictions < 0.5, 0, 1)

print('Predictions:\n', predictions.numpy())
print('Labels:\n', label_batch)

plt.figure(figsize=(10, 10))
for i in range(9):
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(image_batch[i].astype("uint8"))
  plt.title(class_names[predictions[i]])
  plt.axis("off")