# Surface defect detection with semantic segmentation on Amazon SageMaker

In this notebook, we will train a TensorFlow2 model with unet[https://arxiv.org/abs/1505.04597] architecture for surface defect detection images. We will then compile and prepare the model for deployment with SageMaker Edge. 
For a complete workshop, check out the repo[https://github.com/aws-samples/amazon-sagemaker-edge-defect-detection-computer-vision] 
1. Prerequisites : Prepare the dataset (see data_preparation notebook)
2. Train the model with SM 
3. Compile model with SM Neo
4. Prepare deployment package for SM Edge Manager

In [None]:
!pip install matplotlib

In [None]:
!pip install -U tensorflow==2.2.0 --quiet

In [None]:
!pip install -U sagemaker --quiet

In [None]:
!pip install -U botocore boto3 awscli --quiet

In [None]:
import sagemaker
sagemaker.__version__

In [None]:
import os
import numpy as np
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split



In [None]:
import sagemaker
from sagemaker import Session
import boto3 

session = Session()
sts_client = boto3.client('sts')
account_id = sts_client.get_caller_identity()["Account"]

region = boto3.Session().region_name
role = sagemaker.get_execution_role()

bucket = 'sm-edge-getting-started-unet-%s' % (account_id)
prefix = 'segmentation'
region = session.boto_region_name
# Define IAM role
from sagemaker import get_execution_role
import pandas as pd
import numpy as np
import urllib
import os

role = get_execution_role()
from sagemaker.s3 import S3Uploader

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
IMAGE_SIZE = 256
EPOCHS = 100
BATCH = 8
LR = 1e-4

In [None]:
from glob import glob
def load_data(path, split=0.1):
    print(os.path.join(path, "images/*"))
    images = sorted(glob(os.path.join(path, "images/*")))
    masks = sorted(glob(os.path.join(path, "masks/*")))

    total_size = len(images)
    valid_size = int(split * total_size)
    test_size = int(split * total_size)
    print(total_size)
    train_x, valid_x = train_test_split(images, test_size=valid_size, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size=valid_size, random_state=42)

    train_x, test_x = train_test_split(train_x, test_size=test_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=test_size, random_state=42)

    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)

In [None]:
path = './data/kolektor-preprocessed/semantic-segmentation/'
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(path)

In [None]:
for file in train_x:
    S3Uploader.upload(file, 's3://{}/{}/data/train/images'.format(bucket, prefix))
for file in train_y:
    S3Uploader.upload(file, 's3://{}/{}/data/train/masks'.format(bucket, prefix))
for file in valid_x:
    S3Uploader.upload(file, 's3://{}/{}/data/val/images'.format(bucket, prefix))
for file in valid_y:
    S3Uploader.upload(file, 's3://{}/{}/data/val/masks'.format(bucket, prefix))
for file in test_x:
    S3Uploader.upload(file, 's3://{}/{}/data/test/images'.format(bucket, prefix))
for file in test_y:
    S3Uploader.upload(file, 's3://{}/{}/data/test/masks'.format(bucket, prefix))

In [None]:
from sagemaker.inputs import TrainingInput
train_input = TrainingInput('s3://{0}/{1}/data/train/'.format(
    bucket, prefix), content_type='image/png')
val_input = TrainingInput('s3://{0}/{1}/data/val/'.format(
    bucket, prefix), content_type='image/png')

Train TensorFlow model

In [None]:
!pygmentize source_dir/train_tf.py

In [None]:
from sagemaker.tensorflow import TensorFlow
model_dir = '/opt/ml/model'
hyperparameters = {'epochs': 10, 'batch_size': 8, 'learning_rate': 0.0001}
estimator = TensorFlow(source_dir='source_dir',
                             entry_point='train_tf.py',
                             model_dir=model_dir,
                             instance_type='ml.c5.xlarge',
                             #instance_type='local',
                             instance_count=1,
                             hyperparameters=hyperparameters,
                             role=role,
                             output_path='s3://{}/{}/{}'.format(bucket, prefix, 'tf_model'),
                             framework_version='2.2.0',
                             py_version='py37',
                             script_mode=True)

inputs = {'train':train_input, 'validation':val_input}
pred=estimator.fit(inputs,job_name='unet-segmentation-tf2-5')

In [None]:
model_data = 's3://{}/{}/{}'.format(bucket, prefix, 'tf_model') + '/unet-segmentation-tf2-5/output/model.tar.gz'
print('model uploaded to: {}'.format(model_data))

In [None]:
print(prefix)

### Compile the model with SageMaker Neo

In [None]:
import sagemaker
import boto3
import time
sm_client = boto3.client('sagemaker')
s3_path = model_data
compilation_job_name = '%s-%d' % ('unet', int(time.time()*1000))
sm_client.create_compilation_job(
    CompilationJobName=compilation_job_name,
    RoleArn=role,
    InputConfig={
        'S3Uri': s3_path,
        'DataInputConfig': '{"input_image":[1,%d,%d,%d]}' % (3,256, 256),
        'Framework': 'KERAS'
    },
    OutputConfig={
        'S3OutputLocation': 's3://%s/%s/optimized/' % (bucket, prefix),
        'TargetPlatform': { 'Os': 'LINUX', 'Arch': 'X86_64' }
        #'TargetPlatform': { 'Os': 'LINUX', 'Arch': 'ARM64', 'Accelerator': 'NVIDIA' },
        #'CompilerOptions': '{"trt-ver": "7.1.3", "cuda-ver": "10.2", "gpu-code": "sm_53"}'
        #'TargetPlatform': { 'Os': 'LINUX', 'Arch': 'ARM64'},
        #'TargetDevice': 'ml_c5'
    },
    StoppingCondition={ 'MaxRuntimeInSeconds': 900 }
)
while True:
    resp = sm_client.describe_compilation_job(CompilationJobName=compilation_job_name)
    if resp['CompilationJobStatus'] in ['STARTING', 'INPROGRESS']:
        print('Running...')
    else:
        print(resp['CompilationJobStatus'], compilation_job_name)
        break
    time.sleep(5)

### Package the model with SageMaker Edge Manager 

In [None]:
edge_packaging_job_name='%s-%d' % ('unet', int(time.time()*1000))
model_version='1.0'
model_name='unet'
num_classes=1
resp = sm_client.create_edge_packaging_job(
    EdgePackagingJobName=edge_packaging_job_name,
    CompilationJobName=compilation_job_name,
    ModelName=model_name,
    ModelVersion=model_version,
    RoleArn=role,
    OutputConfig={
        'S3OutputLocation': 's3://%s/%s-%dx%d-%d/' % (bucket, prefix, 256, 256, num_classes)
    }
)
while True:
    resp = sm_client.describe_edge_packaging_job(EdgePackagingJobName=edge_packaging_job_name)
    if resp['EdgePackagingJobStatus'] in ['STARTING', 'INPROGRESS']:
        print('Running...')
    else:
        print(resp['EdgePackagingJobStatus'], compilation_job_name)
        break
    time.sleep(5)

In [None]:
print('s3://%s/%s-%dx%d-%d/' % (bucket_name, prefix, 256, 256, num_classes))