In [None]:
# An S3 Bucket Name
data_bucket_name='plantshealth'

# A prefix name inside the S3 bucket containing sub-folders of images (one per label class)
dataset_name = 'Apple' 

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()
sess = sagemaker.Session()

training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version="latest")

In [None]:
# Find im2rec in our environment and set up some other vars in our environemnt

base_dir='/tmp'

%env BASE_DIR=$base_dir
%env S3_DATA_BUCKET_NAME = $data_bucket_name
%env DATASET_NAME = $dataset_name

import sys,os

suffix='/mxnet/tools/im2rec.py'
im2rec = list(filter( (lambda x: os.path.isfile(x + suffix )), sys.path))[0] + suffix
%env IM2REC=$im2rec

In [None]:
%%bash
# Use the IM2REC script to convert our images into RecordIO files

# Clean up our working dir of existing LST and REC files
cd $BASE_DIR
rm *.rec
rm *.lst

# First we need to create two LST files (training and test lists), noting the correct label class for each image
# We'll also save the output of the LST files command, since it includes a list of all of our label classes
echo "Creating LST files"
python $IM2REC --list --recursive --pass-through --test-ratio=0.3 --train-ratio=0.7 $DATASET_NAME $DATASET_NAME > ${DATASET_NAME}_classes

echo "Label classes:"
cat ${DATASET_NAME}_classes

# Then we create RecordIO files from the LST files
echo "Creating RecordIO files"
python $IM2REC --num-thread=4 ${DATASET_NAME}_train.lst $DATASET_NAME
python $IM2REC --num-thread=4 ${DATASET_NAME}_test.lst $DATASET_NAME
ls -lh *.rec

In [None]:
# Upload our train and test RecordIO files to S3 in the bucket that our sagemaker session is using
bucket = sess.default_bucket()

s3train_path = 's3://{}/{}/train/'.format(bucket, dataset_name)
s3validation_path = 's3://{}/{}/validation/'.format(bucket, dataset_name)

# Clean up any existing data
!aws s3 rm s3://{bucket}/{dataset_name}/train --recursive
!aws s3 rm s3://{bucket}/{dataset_name}/validation --recursive

# Upload the rec files to the train and validation channels
!aws s3 cp /tmp/{dataset_name}_train.rec $s3train_path
!aws s3 cp /tmp/{dataset_name}_test.rec $s3validation_path

In [None]:
train_data = sagemaker.session.s3_input(
    s3train_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

validation_data = sagemaker.session.s3_input(
    s3validation_path, 
    distribution='FullyReplicated', 
    content_type='application/x-recordio', 
    s3_data_type='S3Prefix'
)

data_channels = {'train': train_data, 'validation': validation_data}

In [None]:
s3_output_location = 's3://{}/{}/output'.format(bucket, dataset_name)

image_classifier = sagemaker.estimator.Estimator(
    training_image,
    role, 
    train_instance_count=1, 
    train_instance_type='ml.p3.2xlarge',
    output_path=s3_output_location,
    sagemaker_session=sess
)

In [None]:
num_clabase_dir}/{dataset_name} | wc -lsses=! ls -l {
num_classes=int(num_classes[0]) - 1

num_training_samples=! cat {base_dir}/{dataset_name}_train.lst | wc -l
num_training_samples = int(num_training_samples[0])

# Learn more about the Sagemaker built-in Image Classifier hyperparameters here: https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html

# These hyperparameters we won't want to change, as they define things like
# the size of the images we'll be sending for input, the number of training classes we have, etc.
base_hyperparameters=dict(
    use_pretrained_model=1,
    image_shape='3,256,256,
    num_classes=num_classes,
    num_training_samples=num_training_samples,
)

# These are hyperparameters we may want to tune, as they can affect the model training success:
hyperparameters={
    **base_hyperparameters, 
    **dict(
        learning_rate=0.001,
        mini_batch_size=5,
        epoch=30,
    )
}


image_classifier.set_hyperparameters(**hyperparameters)

hyperparameters

In [None]:
# create the Amazon SageMaker training job
sagemaker = boto3.client(service_name='sagemaker')
sagemaker.create_training_job(**training_params)
training_job_name = 'plantshealth' + dataset_name.replace('_', '-') + '-' + model
# confirm that the training job has started
status = sagemaker.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']
print('Training job current status: {}'.format(status))

try:
    # wait for the job to finish and report the ending status
    sagemaker.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=job_name)
    training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
    status = training_info['TrainingJobStatus']
    print("Training job ended with status: " + status)
except:
    print('Training failed to start')
     # if exception is raised, that means it has failed
    message = sagemaker.describe_training_job(TrainingJobName=job_name)['FailureReason']
    print('Training failed with the following error: {}'.format(message))

In [None]:
training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
status = training_info['TrainingJobStatus']
print("Training job ended with status: " + status)

In [None]:
#boto3 is the SDK for Python. It enables Python developers to create, configure, and manage AWS services,
#get_execution_role() will return the IAM role name that was passed in as part of the notebook creation. 
#get_image_uridentifies an abstract or physical resource
import boto3
from time import gmtime, strftime
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()
sess = sagemaker.Session()

sage = boto3.Session().client(service_name='sagemaker') 


model_name="plantshealth-model"
print(model_name)
info = sage.describe_training_job(TrainingJobName)
#Model artifacts are the output that results from training a model, and typically consist of trained parameters, 
#a model defintion that desribes how to compute inferences, and other metadata.
model_data = info['ModelArtifacts']['S3ModelArtifacts']
print(model_data)
#hosting_image allows individuals to upload images
hosting_image = get_image_uri(boto3.Session().region_name, 'image-classification')

primary_container = {
    'Image': hosting_image,
    'ModelDataUrl': model_data,
}

create_model_response = sage.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])

In [None]:
#Create Endpoint Configuration with Amazon Elastic Inference
from time import gmtime, strftime


endpoint_config_name = 'plantshealth-endconfig'
endpoint_config_response = sage.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
#production variants Identifies a model that you want to host and the resources to deploy for hosting it. 
#If you are deploying multiple models,tell Amazon SageMaker how to distribute traffic among the models 
#by specifying variant weights.
    ProductionVariants=[{
        'InstanceType':'ml.m4.xlarge',
        'InitialInstanceCount':1,#Number of instances to launch initially.
        'ModelName':model_name,
        'AcceleratorType': 'ml.eia1.large', #EI instances provide on-demand GPU computing for inference.
        'VariantName':'AllTraffic'}])

print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))


In [None]:
%%time
import time
sagemaker = boto3.client('sagemaker')

endpoint_name = 'plantshealth-endpoint'
print('Endpoint name: {}'.format(endpoint_name))

endpoint_params = {
    'EndpointName': endpoint_name,
    'EndpointConfigName': endpoint_config_name,
}
endpoint_response = sagemaker.create_endpoint(**endpoint_params)
print('EndpointArn = {}'.format(endpoint_response['EndpointArn']))

In [None]:
#Perform Inference    
#initialise boto3
import boto3
runtime = boto3.Session().client(service_name='runtime.sagemaker')


In [None]:
#getfile
!wget -O /tmp/test.jpg http://ourtestimages.s3-website-us-east-1.amazonaws.com/cornhealthy.jpg 

file_name = '/tmp/test.jpg'
# test image
from IPython.display import Image
Image(file_name)  


In [None]:
#Runinference
import json
import numpy as np

endpoint_name = 'plantshealth-endpoint'


with open(file_name, 'rb') as f:
    payload = f.read()
    payload = bytearray(payload)
response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                   ContentType='application/x-image', 
                                   Body=payload)
result = response['Body'].read()
# result will be in json format and convert it to ndarray
result = json.loads(result)
# the result will output the probabilities for all classes
# find the class with maximum probability and print the class index
index = np.argmax(result)
object_categories = ['Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot', 'Corn_(maize)___Common_rust_', 'Corn_(maize)___healthy','Corn_(maize)___Northern_Leaf_Blight'
                     'Apple___Apple_scab','Apple___Black_rot','Apple___Cedar_apple_rust','Apple___healthy'
                    'Blueberry___healthy'      
                    'Cherry_(including_sour)___Powdery_mildew','Cherry_(including_sour)___healthy',      
                    'Grape___Black_rot','Grape___Esca_(Black_Measles)','Grape___Leaf_blight_(Isariopsis_Leaf_Spot','Grape___healthy'      
                    'Orange___Haunglongbing_(Citrus_greening)'      
                    'Peach___Bacterial_spot',
                    'Peach___healthy','Pepper,bell___Bacterial_spot','Pepper,_bell___healthy',    
                    'Potato___Early_blight','Potato___Late_blight','Potato___healthy',      
                    'Raspberry___healthy',     
                    'Soybean___healthy',       
                    'Squash___Powdery_mildew',      
                    'Strawberry___Leaf_scorch','Strawberry___healthy',
                    'Tomato___Bacterial_spot','Tomato___Early_blight','Tomato___Late_blight','Tomato___Leaf_Mold','Tomato___Septoria_leaf_spot/',
                    'Tomato___Spider_mites Two-spotted_spider_mite','Tomato___Target_Spot','Tomato___Tomato_Yellow_Leaf_Curl_Virus',
                    'Tomato___Tomato_mosaic_virus','Tomato___healthy',
                                               
]
print("Result: label - " + object_categories[index] + ", probability - " + str(result[index]))