In [2]:
! pip install sagemaker botocore boto3 awscli --upgrade

Collecting sagemaker
  Downloading sagemaker-2.102.0.tar.gz (555 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m556.0/556.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting botocore
  Downloading botocore-1.27.45-py3-none-any.whl (9.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Collecting boto3
  Downloading boto3-1.24.45-py3-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.5/132.5 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting awscli
  Downloading awscli-1.25.45-py3-none-any.whl (3.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting rsa<4.8,>=3.1.2
  Downloading rsa-4.7.2-py3-none-any.whl (34 kB)
Collecting PyYAML<5.5,>=3.10
  Downloading Py

In [3]:
%%time
import boto3
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

region = boto3.Session().region_name

s3_client = boto3.client("s3")

sess = sagemaker.Session()

output_bucket = sess.default_bucket()
output_prefix = "ic-transfer-learning"

# download the files
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/caltech-256-60-train.rec > ./caltech-256-60-train.rec
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/caltech-256-60-val.rec > ./caltech-256-60-val.rec

s3_client.upload_file(
    "caltech-256-60-train.rec", output_bucket, output_prefix + "/train_rec/caltech-256-60-train.rec"
)

s3_client.upload_file(
    "caltech-256-60-train.rec",
    output_bucket,
    output_prefix + "/validation_rec/caltech-256-60-train.rec",
)

arn:aws:iam::549665692215:role/service-role/AmazonSageMaker-ExecutionRole-20220725T205814
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  346M  100  346M    0     0  17.9M      0  0:00:19  0:00:19 --:--:-- 20.2M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  343M  100  343M    0     0  15.8M      0  0:00:21  0:00:21 --:--:-- 17.4M  215M    0     0  15.0M      0  0:00:22  0:00:14  0:00:08 16.8M
CPU times: user 5.54 s, sys: 4.27 s, total: 9.81 s
Wall time: 50 s


In [4]:
from sagemaker import image_uris

training_image = image_uris.retrieve(
    "image-classification", sess.boto_region_name, version="latest"
)

print(training_image)

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


501404015308.dkr.ecr.ap-northeast-1.amazonaws.com/image-classification:1


In [5]:
#  Four channels: train, validation, train_lst, and validation_lst
s3train = f"s3://{output_bucket}/{output_prefix}/train_rec/"
s3validation = f"s3://{output_bucket}/{output_prefix}/validation_rec/"

In [11]:
s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"
ic_estimator = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=1,
    instance_type="ml.p2.xlarge",
    volume_size=50,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,
    sagemaker_session=sess,
)

In [12]:
ic_estimator.set_hyperparameters(
    num_layers=18,
    use_pretrained_model=1,
    image_shape="3,224,224",
    num_classes=257,
    num_training_samples=15420,
    mini_batch_size=128,
    epochs=2,
    learning_rate=0.01,
    precision_dtype="float32",
)

In [13]:
train_data = sagemaker.inputs.TrainingInput(
    s3train,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="S3Prefix",
)
validation_data = sagemaker.inputs.TrainingInput(
    s3validation,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="S3Prefix",
)

data_channels = {"train": train_data, "validation": validation_data}

In [14]:
ic_estimator.fit(inputs=data_channels, logs=True)

2022-08-04 07:45:33 Starting - Starting the training job...
2022-08-04 07:45:59 Starting - Preparing the instances for trainingProfilerReport-1659599133: InProgress
............
2022-08-04 07:47:57 Downloading - Downloading input data............
2022-08-04 07:49:58 Training - Downloading the training image..[34mDocker entrypoint called with argument(s): train[0m
[34m[08/04/2022 07:50:15 INFO 139693006374720] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/image_classification/default-input.json: {'use_pretrained_model': 0, 'num_layers': 152, 'epochs': 30, 'learning_rate': 0.1, 'lr_scheduler_factor': 0.1, 'optimizer': 'sgd', 'momentum': 0, 'weight_decay': 0.0001, 'beta_1': 0.9, 'beta_2': 0.999, 'eps': 1e-08, 'gamma': 0.9, 'mini_batch_size': 32, 'image_shape': '3,224,224', 'precision_dtype': 'float32'}[0m
[34m[08/04/2022 07:50:15 INFO 139693006374720] Merging with provided configuration from /opt/ml/input/config/hyperparameters.json: {'epochs': '2', 'imag

In [17]:
from time import gmtime, strftime

timestamp_suffix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

model_name = "DEMO-clarify-image-classification-model-{}".format(timestamp_suffix)
model = ic_estimator.create_model(name=model_name)
container_def = model.prepare_container_def()
sess.create_model(model_name, role, container_def)

'DEMO-clarify-image-classification-model-2022-08-04-07-57-45'

In [20]:
with open("caltech_256_object_categories.txt", "r+") as object_categories_file:
    object_categories = [category.rstrip("\n") for category in object_categories_file.readlines()]

# Let's list top 10 entries from the object_categories list
object_categories[:10]

['001.ak47',
 '002.american-flag',
 '003.backpack',
 '004.baseball-bat',
 '005.baseball-glove',
 '006.basketball-hoop',
 '007.bat',
 '008.bathtub',
 '009.bear',
 '010.beer-mug']

In [15]:
prefix = "sagemaker/DEMO-sagemaker-clarify-cv"
file_name_map = {
    "167.pyramid/167_0002.jpg": "pyramid.jpg",
    "038.chimp/038_0013.jpg": "chimp.jpg",
    "124.killer-whale/124_0013.jpg": "killer-whale.jpg",
    "170.rainbow/170_0001.jpg": "rainbow.jpg",
}


!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/256_ObjectCategories/167.pyramid/167_0002.jpg > ./pyramid.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/256_ObjectCategories/038.chimp/038_0013.jpg > ./chimp.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/256_ObjectCategories/124.killer-whale/124_0013.jpg > ./killer-whale.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/256_ObjectCategories/038.chimp/038_0013.jpg > ./chimp.jpg
!curl  https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/caltech-256/256_ObjectCategories/170.rainbow/170_0001.jpg > ./rainbow.jpg

for file_name in file_name_map:
    s3_client.upload_file(
        file_name_map[file_name], output_bucket, f"{prefix}/{file_name_map[file_name]}"
    )

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 54052  100 54052    0     0  68681      0 --:--:-- --:--:-- --:--:-- 68681
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 37880  100 37880    0     0  48316      0 --:--:-- --:--:-- --:--:-- 48316
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 19076  100 19076    0     0  24741      0 --:--:-- --:--:-- --:--:-- 24741
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 37880  100 37880    0     0  50172      0 --:--:-- --:--:-- --:--:-- 50172
  % Total    % Received % Xferd  Average Speed   Tim

In [21]:
from sagemaker import clarify

s3_data_input_path = "s3://{}/{}/".format(output_bucket, prefix)
clarify_output_prefix = f"{prefix}/cv_analysis_result"
analysis_result_path = "s3://{}/{}".format(output_bucket, clarify_output_prefix)
explainability_data_config = clarify.DataConfig(
    s3_data_input_path=s3_data_input_path,
    s3_output_path=analysis_result_path,
    dataset_type="application/x-image",
)

model_config = clarify.ModelConfig(
    model_name=model_name, instance_type="ml.m5.xlarge", instance_count=1, content_type="image/jpeg"
)

predictions_config = clarify.ModelPredictedLabelConfig(label_headers=object_categories)

image_config = clarify.ImageConfig(
    model_type="IMAGE_CLASSIFICATION", num_segments=20, segment_compactness=5
)

shap_config = clarify.SHAPConfig(num_samples=500, image_config=image_config)

In [22]:
import os

account_id = os.getenv("AWS_ACCOUNT_ID", "<your-account-id>")
sagemaker_iam_role = "<AmazonSageMaker-ExecutionRole>"

# Fetch the IAM role to initialize the sagemaker processing job
try:
    role = sagemaker.get_execution_role()
except ValueError as e:
    print(e)
    role = f"arn:aws:iam::{account_id}:role/{sagemaker_iam_role}"

clarify_processor = clarify.SageMakerClarifyProcessor(
    role=role, instance_count=1, instance_type="ml.m5.xlarge", sagemaker_session=sess
)

In [23]:
clarify_processor.run_explainability(
    data_config=explainability_data_config,
    model_config=model_config,
    explainability_config=shap_config,
    model_scores=predictions_config,
)


Job Name:  Clarify-Explainability-2022-08-04-08-27-54-835
Inputs:  [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result', 'LocalPath': '/opt/ml/processing/output', 'S3Upload

In [24]:
%%time
output_objects = s3_client.list_objects(Bucket=output_bucket, Prefix=clarify_output_prefix)
result_images = []

for file_obj in output_objects["Contents"]:
    file_name = os.path.basename(file_obj["Key"])
    if os.path.splitext(file_name)[1] == ".jpeg":
        result_images.append(file_name)

    print(f"Downloading s3://{output_bucket}/{file_obj['Key']} ...")
    s3_client.download_file(output_bucket, file_obj["Key"], file_name)

Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/analysis_config.json ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/report.html ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/report.ipynb ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/report.pdf ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/shap_chimp.jpeg ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/shap_killer-whale.jpeg ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-cv/cv_analysis_result/shap_pyramid.jpeg ...
Downloading s3://sagemaker-ap-northeast-1-549665692215/sagemaker/DEMO-sagemaker-clarify-

In [25]:
%%time

# Delete the SageMaker model
model.delete_model()

CPU times: user 16.2 ms, sys: 0 ns, total: 16.2 ms
Wall time: 157 ms
