In [None]:
!pip install tqdm bing-image-downloader --upgrade

In [None]:
import sagemaker
import boto3
import os
import urllib.request
import tarfile
from tqdm import tqdm
from sagemaker import get_execution_role

role = get_execution_role()
sess = sagemaker.Session()
bucket = sess.default_bucket()
prefix = "AIMLwithAWS/Chapter5"

print(f'IAM Role: {role}')
print(f'S3 Bucket {bucket}')

In [None]:
## Identify the SageMaker managed container for image classification
training_image = sagemaker.image_uris.retrieve(
    region=sess.boto_region_name, framework="image-classification"
)
print(f'Container: {training_image}')

In [None]:
## Download our data set from the sagemaker-sample-files bucket and expand (with a progress meter)
def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)

s3 = boto3.client("s3")
s3.download_file(
    "sagemaker-sample-files",
    "datasets/image/caltech-101/101_ObjectCategories.tar.gz",
    "ObjectCategories.tar.gz",
)

## This command can take ~3 minutes to finish
with tarfile.open(name='ObjectCategories.tar.gz') as tar:
    for member in tqdm(iterable=tar.getmembers(), total=len(tar.getmembers())):
        tar.extract(member=member)

## We can also download a provided script that we can use to convert images to RecordIO format
download("https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py")

In [None]:
## For the purposes of this example, we only use the first few categories
!rm -f ./ObjectCategories/[c-z]*
!rm -f ./ObjectCategories/[A-Z]*

In [None]:
%%bash

mkdir -p caltech_101_train

for i in 101_ObjectCategories/*; do
    c=`basename $i`
    mkdir -p caltech_101_train/$c
    for j in `ls $i/*.jpg | shuf | head -n 30`; do
        mv $j caltech_101_train/$c/
    done
done

In [None]:
!python im2rec.py --list --recursive caltech-101-train caltech_101_train/ | sort

In [None]:
!python im2rec.py --list --recursive caltech-101-val 101_ObjectCategories/ | sort

In [None]:
!head -n 3 ./caltech-101-train.lst > example.lst
f = open("example.lst", "r")
lst_content = f.read()
print(lst_content)

In [None]:
## Load our training and validation data to s3 so training and inference can access it

s3train = "s3://{}/{}/train/".format(bucket, prefix)
s3validation = "s3://{}/{}/validation/".format(bucket, prefix)
s3train_lst = "s3://{}/{}/train_lst/".format(bucket, prefix)
s3validation_lst = "s3://{}/{}/validation_lst/".format(bucket, prefix)

!aws s3 cp caltech_101_train $s3train --recursive --quiet
!aws s3 cp 101_ObjectCategories $s3validation --recursive --quiet

!aws s3 cp caltech-101-train.lst $s3train_lst --quiet
!aws s3 cp caltech-101-val.lst $s3validation_lst --quiet

In [None]:
!python im2rec.py --resize 256 --quality 90 --num-thread 16 caltech-101-val 101_ObjectCategories/
!python im2rec.py --resize 256 --quality 90 --num-thread 16 caltech-101-train caltech_101_train/

In [None]:
## Create our estimator object

s3_output_location = "s3://{}/{}/output".format(bucket, prefix)
ic_estimator = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    volume_size=50,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,
    sagemaker_session=sess,
    num_classes=14,
)

In [None]:
## Set the hyperparameters for our model

ic_estimator.set_hyperparameters(
    num_layers=18,
    use_pretrained_model=1,
    image_shape="3,224,224", ##channels, height, width
    num_classes=14,
    mini_batch_size=128,
    epochs=2,
    learning_rate=0.01,
    top_k=2,
    num_training_samples=1174,
    resize=256,
    precision_dtype="float32",
)

In [None]:
train_data = sagemaker.inputs.TrainingInput(
    s3train,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)
validation_data = sagemaker.inputs.TrainingInput(
    s3validation,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)
train_data_lst = sagemaker.inputs.TrainingInput(
    s3train_lst,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)
validation_data_lst = sagemaker.inputs.TrainingInput(
    s3validation_lst,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)

data_types = {
    "train": train_data,
    "validation": validation_data,
    "train_lst": train_data_lst,
    "validation_lst": validation_data_lst,
}

In [None]:
## This model requires GPU to train.  If you get an error saying that your service limit is 0 instances, 
## go to the quotas in your console and request a limit increase

ic_estimator.fit(inputs=data_types, logs=True)

In [None]:
ic_classifier = ic_estimator.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

In [None]:
from bing_image_downloader import downloader
downloader.download("buddha", limit=1,  output_dir='test', adult_filter_off=False, force_replace=False, timeout=60, verbose=False)
downloader.download("jet", limit=1,  output_dir='test', adult_filter_off=False, force_replace=False, timeout=60, verbose=False)
downloader.download("spaceship", limit=1,  output_dir='test', adult_filter_off=False, force_replace=False, timeout=60, verbose=False)

test1 = "test/buddha/Image_1.jpg"
test2 = "test/jet/Image_1.jpeg"
test3 = "test/spaceship/Image_1.jpg"

In [None]:
import json
import numpy as np
from sagemaker.serializers import IdentitySerializer

with open(test2, "rb") as f:
    payload = f.read()

ic_classifier.serializer = IdentitySerializer("image/jpeg")
result = json.loads(ic_classifier.predict(payload))

index = np.argmax(result)

object_categories = [
    "accordion",
    "airplanes",
    "anchor",
    "ant",
    "barrel",
    "bass",
    "beaver",
    "binocular",
    "bonsai",
    "brain",
    "brontosaurus",
    "buddha",
    "butterfly",
]
print("Label: " + object_categories[index] + "\nConfidence: " + str(round(result[index], 2)))

In [None]:
## Remember to clean up resources when you are finishes
ic_classifier.delete_endpoint()