<a href="https://colab.research.google.com/github/benjaminbrown038/Amazon/blob/main/notebooks/amazon/amazon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Amazon

## Computer Vision

- Image Classification
- Object Detection
- Semantic Segmentation
- Instance Segmentation
- Image Embedding

## Text

- Text Classification
- Sentence Pair Classification
- Question Answering
- Named Entity Recognition
- Text Summarization
- Text Generation
- Machine Translation
- Text Embedding


## Tabular

- Tabular Classification (LightGBM & Catboost)
- Tabular Classification (XGBoost & Scikit-learn Linear Learner)
- Tabular Classification (AutoGluon)
- Tabular Classification (TabTransformer)
- Tabular Regression (LightGBM & Catboost)
- Tabular Regression (XGBoost & Scikit-learn Linear Learner)
- Tabular Regression (AutoGluon)
- Tabular Regression (TabTransformer)

## Image Classification

In [None]:
!pip3 install sagemaker ipywidgets --upgrade --quiet
import sagemaker, boto3, json, IPython

from sagemaker import get_execution_role, image_uris, model_uris, script_uris, hyperparameters
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base
from sagemaker.estimator import Estimator
from sagemaker.tuner import HyperparameterTuner, ContinuousParameter

from IPython.core.display import HTML
import ipywidgets as widgets

In [None]:
aws_role = get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

In [None]:
(model_id,model_version) =("pytorch-ic-mobilenet-v2","*")

In [None]:
model_id

In [None]:
model_version

In [None]:
boto3.client("s3").download_file(f"jumpstart-cache-prod-{aws_region}", "models_manifest.json", "models_manifest.json")

In [None]:
with open("models_manifest.json", "rb") as json_file:
    model_list = json.load(json_file)
model_list

In [None]:
ic_models_all_versions, ic_models = [model["model_id"] for model in model_list if "-ic-" in model["model_id"]], [][ic_models.append(model) for model in ic_models_all_versions if model not in ic_models]

In [None]:
ic_models_all_versions

In [None]:
ic_models

In [None]:
dropdown = widgets.Dropdown(
    options=ic_models,
    value=model_id,
    description="JumpStart Image Classification Models:",
    style={"description_width": "initial"},
    layout={"width": "max-content"})
dropdown

In [None]:
display(IPython.display.Markdown("## Select a JumpStart pre-trained model from the dropdown below"))

In [None]:
display(dropdown)

In [None]:
infer_model_id, infer_model_version = dropdown.value, "*"

In [None]:
infer_model_id

In [None]:
infer_model_version

In [None]:
endpoint_name = name_from_base(f"jumpstart-example-{infer_model_id}")
endpoint_name

In [None]:
inference_instance_type = "ml.m5.xlarge"
inference_instance_type

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=infer_model_id,
    model_version=infer_model_version,
    instance_type=inference_instance_type)
deploy_image_uri

In [None]:
deploy_source_uri = script_uris.retrieve(model_id=infer_model_id, model_version=infer_model_version, script_scope="inference")
deploy_source_uri

In [None]:
base_model_uri = model_uris.retrieve(model_id=infer_model_id, model_version=infer_model_version, model_scope="inference")

'''
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    model_data=base_model_uri,
    entry_point="inference.py",
    role=aws_role,
    predictor_cls=Predictor,
    name=endpoint_name)
'''
base_model_uri

In [None]:
base_model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    endpoint_name=endpoint_name)
base_model_predictor

In [None]:
s3_bucket = f"jumpstart-cache-prod-{aws_region}"
s3_bucket

In [None]:
key_prefix = "inference-notebook-assets"
key_prefix

In [None]:
def download_from_s3(images):
    for filename, image_key in images.items():
        boto3.client("s3").download_file(s3_bucket, f"{key_prefix}/{image_key}", filename)

In [None]:
images = {"img1.jpg": "cat.jpg", "img2.jpg": "dog.jpg"}
images

In [None]:
download_from_s3(images)

In [None]:
def predict_top_k_labels(probabilities, labels, k):
    topk_prediction_ids = sorted(range(len(probabilities)), key=lambda index: probabilities[index], reverse=True)[:k]
    topk_class_labels = ", ".join([labels[id] for id in topk_prediction_ids])
    return topk_class_labels

In [None]:
for image_filename in images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = base_model_predictor.predict(img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
    model_predictions = json.loads(query_response)
    labels, probabilities = model_predictions["labels"], model_predictions["probabilities"]
    top5_class_labels = predict_top_k_labels(probabilities, labels, 5)
    display(HTML(f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
                 f"<figcaption>Top-5 predictions: {top5_class_labels} </figcaption>"))

In [None]:
base_model_predictor.delete_model()

In [None]:
base_model_predictor.delete_endpoint()

In [None]:
model_id, model_version = dropdown.value, "*"

In [None]:
model_id

In [None]:
model_version

In [None]:
training_instance_type = "ml.p3.2xlarge"
training_instance_type

In [None]:
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    model_id=model_id,
    model_version=model_version,
    image_scope="training",
    instance_type=training_instance_type)
train_image_uri

In [None]:
train_source_uri = script_uris.retrieve(model_id=model_id, model_version=model_version, script_scope="training")
train_source_uri

In [None]:
train_model_uri = model_uris.retrieve(model_id=model_id, model_version=model_version, model_scope="training")
train_model_uri

In [None]:
training_data_bucket = f"jumpstart-cache-prod-{aws_region}"
training_data_bucket

In [None]:
training_data_prefix = "training-datasets/tf_flowers/"
training_data_prefix

In [None]:
training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}"
training_dataset_s3_path

In [None]:
output_bucket = sess.default_bucket()
output_bucket

In [None]:
output_prefix = "jumpstart-example-ic-training"
output_prefix

In [None]:
s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"
s3_output_location

In [None]:
hyperparameters = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)
hyperparameters

In [None]:
hyperparameters["epochs"] = "5"

In [None]:
print(hyperparameters)

In [None]:
use_amt = True
use_amt

In [None]:
metric_definitions_per_model = {
    "tensorflow": {"metrics": [{"Name": "val_accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"}],
                   "type": "Maximize"},
    "pytorch": {"metrics": [{"Name": "val_accuracy", "Regex": "val Acc: ([0-9\\.]+)"}],
                "type": "Maximize"}}
metric_definitions_per_model

In [None]:
hyperparameter_ranges = {"adam-learning-rate": ContinuousParameter(0.0001, 0.1, scaling_type="Logarithmic")}
hyperparameter_ranges

In [None]:
max_jobs = 6
max_jobs

In [None]:
max_parallel_jobs = 2
max_parallel_jobs

In [None]:
training_job_name = name_from_base(f"jumpstart-example-{model_id}-transfer-learning")
training_job_name

In [None]:
ic_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=s3_output_location,
    base_job_name=training_job_name)
ic_estimator

In [None]:
if use_amt:
    metric_definitions = next(value for key, value in metric_definitions_per_model.items() if model_id.startswith(key))
    hp_tuner = HyperparameterTuner(
        ic_estimator,
        metric_definitions["metrics"][0]["Name"],
        hyperparameter_ranges,
        metric_definitions["metrics"],
        max_jobs=max_jobs,
        max_parallel_jobs=max_parallel_jobs,
        objective_type=metric_definitions["type"],
        base_tuning_job_name=training_job_name)
    hp_tuner.fit({"training": training_dataset_s3_path})
else:
    ic_estimator.fit({"training": training_dataset_s3_path}, logs=True)

In [None]:
inference_instance_type = "ml.m5.xlarge"
inference_instance_type

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type)
deploy_image_uri

In [None]:
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference")
deploy_source_uri

In [None]:
endpoint_name = name_from_base(f"jumpstart-example-FT-{model_id}-")
endpoint_name

In [None]:
finetuned_predictor = (hp_tuner if use_amt else ic_estimator).deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name)
finetuned_predictor

In [None]:
s3_bucket = f"jumpstart-cache-prod-{aws_region}"
s3_bucket

In [None]:
key_prefix = "training-datasets/tf_flowers"
key_prefix

In [None]:
def download_from_s3(images):
    for filename, image_key in images.items():
        boto3.client("s3").download_file(s3_bucket, f"{key_prefix}/{image_key}", filename)

In [None]:
flower_images = {"img1.jpg": "roses/10503217854_e66a804309.jpg",
                 "img2.jpg": "sunflowers/1008566138_6927679c8a.jpg"}
flower_images

In [None]:
download_from_s3(flower_images)

In [None]:
for image_filename in flower_images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = finetuned_predictor.predict(img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
    model_predictions = json.loads(query_response)
    predicted_label = model_predictions["predicted_label"]
    display(HTML(f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
                 f"<figcaption>Predicted Label: {predicted_label}</figcaption>"))

In [None]:
finetuned_predictor.delete_model()

In [None]:
finetuned_predictor.delete_endpoint()

In [None]:
if use_amt:
    sage_client = boto3.Session().client("sagemaker")
    tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=hp_tuner._current_job_name)
    last_training_job_name = tuning_job_result["BestTrainingJob"]["TrainingJobName"]
else:
    last_training_job_name = ic_estimator._current_job_name

In [None]:
last_trained_model_path = f"{s3_output_location}/{last_training_job_name}/output/model.tar.gz"
last_trained_model_path

In [None]:
incremental_train_output_prefix = "jumpstart-example-ic-incremental-training"
incremental_train_output_prefix

In [None]:
incremental_s3_output_location = f"s3://{output_bucket}/{incremental_train_output_prefix}/output"
incremental_s3_output_location

In [None]:
incremental_training_job_name = name_from_base(f"jumpstart-example-{model_id}-incremental-training")
incremental_training_job_name

In [None]:
incremental_train_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=last_trained_model_path,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=incremental_s3_output_location,
    base_job_name=incremental_training_job_name)
incremental_train_estimator

In [None]:
incremental_train_estimator.fit({"training": training_dataset_s3_path}, logs=True)

## Object Detection

In [None]:
!pip3 install sagemaker ipywidgets --upgrade --quiet

import sagemaker, boto3, json
import IPython
from IPython.core.display import HTML
import ipywidgets as widgets

from sagemaker import get_execution_role
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base
from sagemaker import get_execution_role, image_uris, model_uris, script_uris, hyperparameters
from sagemaker import hyperparameters
from sagemaker.tuner import ContinuousParameter
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
from sagemaker.tuner import HyperparameterTuner


In [None]:
aws_role = get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

In [None]:
(model_id,
    model_version,) = (
    "pytorch-ic-mobilenet-v2",
    "*",
)

In [None]:
boto3.client("s3").download_file(f"jumpstart-cache-prod-{aws_region}", "models_manifest.json", "models_manifest.json")

In [None]:
with open("models_manifest.json", "rb") as json_file:
    model_list = json.load(json_file)
model_list

In [None]:
ic_models_all_versions, ic_models = [model["model_id"] for model in model_list if "-ic-" in model["model_id"]], [][ic_models.append(model) for model in ic_models_all_versions if model not in ic_models]

In [None]:
ic_models_all_versions

In [None]:
ic_models

In [None]:
dropdown = widgets.Dropdown(
    options=ic_models,
    value=model_id,
    description="JumpStart Image Classification Models:",
    style={"description_width": "initial"},
    layout={"width": "max-content"})
dropdown

In [None]:
display(IPython.display.Markdown("## Select a JumpStart pre-trained model from the dropdown below"))

In [None]:
display(dropdown)

In [None]:
infer_model_id, infer_model_version = dropdown.value, "*"

In [None]:
infer_model_id

In [None]:
infer_model_version

In [None]:
endpoint_name = name_from_base(f"jumpstart-example-{infer_model_id}")
endpoint_name

In [None]:
inference_instance_type = "ml.m5.xlarge"
inference_instance_type

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=infer_model_id,
    model_version=infer_model_version,
    instance_type=inference_instance_type)
deploy_image_uri

In [None]:
deploy_source_uri = script_uris.retrieve(model_id=infer_model_id, model_version=infer_model_version, script_scope="inference")
deploy_source_uri

In [None]:
base_model_uri = model_uris.retrieve(model_id=infer_model_id, model_version=infer_model_version, model_scope="inference")
base_model_uri

In [None]:
model = Model(
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    model_data=base_model_uri,
    entry_point="inference.py",
    role=aws_role,
    predictor_cls=Predictor,
    name=endpoint_name)
model

In [None]:
base_model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    endpoint_name=endpoint_name)
base_model_predictor

In [None]:
s3_bucket = f"jumpstart-cache-prod-{aws_region}"
s3_bucket

In [None]:
key_prefix = "inference-notebook-assets"
key_prefix

In [None]:
def download_from_s3(images):
    for filename, image_key in images.items():
        boto3.client("s3").download_file(s3_bucket, f"{key_prefix}/{image_key}", filename)

In [None]:
images = {"img1.jpg": "cat.jpg", "img2.jpg": "dog.jpg"}
images

In [None]:
download_from_s3(images)

In [None]:
def predict_top_k_labels(probabilities, labels, k):
    topk_prediction_ids = sorted(range(len(probabilities)), key=lambda index: probabilities[index], reverse=True)[:k]
    topk_class_labels = ", ".join([labels[id] for id in topk_prediction_ids])
    return topk_class_labels

In [None]:
for image_filename in images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = base_model_predictor.predict(img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
    model_predictions = json.loads(query_response)
    labels, probabilities = model_predictions["labels"], model_predictions["probabilities"]
    top5_class_labels = predict_top_k_labels(probabilities, labels, 5)
    display(HTML(f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
                 f"<figcaption>Top-5 predictions: {top5_class_labels} </figcaption>"))

In [None]:
base_model_predictor.delete_model()

In [None]:
base_model_predictor.delete_endpoint()

In [None]:
model_id, model_version = dropdown.value, "*"

In [None]:
model_id

In [None]:
model_version

In [None]:
training_instance_type = "ml.p3.2xlarge"
training_instance_type

In [None]:
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    model_id=model_id,
    model_version=model_version,
    image_scope="training",
    instance_type=training_instance_type)
train_image_uri

In [None]:
train_source_uri = script_uris.retrieve(model_id=model_id, model_version=model_version, script_scope="training")
train_source_uri

In [None]:
train_model_uri = model_uris.retrieve(model_id=model_id, model_version=model_version, model_scope="training")
train_model_uri

In [None]:

training_data_bucket = f"jumpstart-cache-prod-{aws_region}"


In [None]:
training_data_prefix = "training-datasets/tf_flowers/"
training_data_prefix

In [None]:
training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}"
training_dataset_s3_path

In [None]:
output_bucket = sess.default_bucket()
output_bucket

In [None]:
output_prefix = "jumpstart-example-ic-training"
output_prefix

In [None]:
s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"
s3_output_location

In [None]:
hyperparameters = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)
hyperparameters

In [None]:
hyperparameters["epochs"] = "5"

In [None]:
print(hyperparameters)

In [None]:
use_amt = True
use_amt

In [None]:
metric_definitions_per_model = {
    "tensorflow": {
        "metrics": [{"Name": "val_accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"}],
        "type": "Maximize"},
    "pytorch": {
        "metrics": [{"Name": "val_accuracy", "Regex": "val Acc: ([0-9\\.]+)"}],
        "type": "Maximize"}}
metric_definitions_per_model

In [None]:
hyperparameter_ranges = {"adam-learning-rate": ContinuousParameter(0.0001, 0.1, scaling_type="Logarithmic")}
hyperparameter_ranges

In [None]:
max_jobs = 6
max_jobs

In [None]:
max_parallel_jobs = 2
max_parallel_jobs

In [None]:
training_job_name = name_from_base(f"jumpstart-example-{model_id}-transfer-learning")
training_job_name

In [None]:
ic_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=s3_output_location,
    base_job_name=training_job_name)
ic_estimator

In [None]:
if use_amt:
    metric_definitions = next(
        value for key, value in metric_definitions_per_model.items() if model_id.startswith(key))
    hp_tuner = HyperparameterTuner(
        ic_estimator,
        metric_definitions["metrics"][0]["Name"],
        hyperparameter_ranges,
        metric_definitions["metrics"],
        max_jobs=max_jobs,
        max_parallel_jobs=max_parallel_jobs,
        objective_type=metric_definitions["type"],
        base_tuning_job_name=training_job_name)
    hp_tuner.fit({"training": training_dataset_s3_path})
else:
    ic_estimator.fit({"training": training_dataset_s3_path}, logs=True)

In [None]:
inference_instance_type = "ml.m5.xlarge"
inference_instance_type

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type)
deploy_image_uri

In [None]:
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference")
deploy_source_uri

In [None]:
endpoint_name = name_from_base(f"jumpstart-example-FT-{model_id}-")
endpoint_name

In [None]:
finetuned_predictor = (hp_tuner if use_amt else ic_estimator).deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name)
finetuned_predictor

In [None]:
s3_bucket = f"jumpstart-cache-prod-{aws_region}"
s3_bucket

In [None]:
key_prefix = "training-datasets/tf_flowers"
key_prefix

In [None]:
def download_from_s3(images):
    for filename, image_key in images.items():
        boto3.client("s3").download_file(s3_bucket, f"{key_prefix}/{image_key}", filename)

In [None]:
flower_images = {"img1.jpg": "roses/10503217854_e66a804309.jpg",
                 "img2.jpg": "sunflowers/1008566138_6927679c8a.jpg"}
flower_images

In [None]:
download_from_s3(flower_images)

In [None]:
for image_filename in flower_images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = finetuned_predictor.predict(img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
    model_predictions = json.loads(query_response)
    predicted_label = model_predictions["predicted_label"]
    display(HTML(f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
                 f"<figcaption>Predicted Label: {predicted_label}</figcaption>"))

In [None]:
finetuned_predictor.delete_model()

In [None]:
finetuned_predictor.delete_endpoint()

In [None]:
if use_amt:
    sage_client = boto3.Session().client("sagemaker")
    tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=hp_tuner._current_job_name)
    last_training_job_name = tuning_job_result["BestTrainingJob"]["TrainingJobName"]
else:
    last_training_job_name = ic_estimator._current_job_name

In [None]:
last_trained_model_path = f"{s3_output_location}/{last_training_job_name}/output/model.tar.gz"
last_trained_model_path

In [None]:
incremental_train_output_prefix = "jumpstart-example-ic-incremental-training"
incremental_train_output_prefix

In [None]:
incremental_s3_output_location = f"s3://{output_bucket}/{incremental_train_output_prefix}/output"

incremental_training_job_name = name_from_base(f"jumpstart-example-{model_id}-incremental-training")

incremental_train_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=last_trained_model_path,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=incremental_s3_output_location,
    base_job_name=incremental_training_job_name,
)

incremental_train_estimator.fit({"training": training_dataset_s3_path}, logs=True)