In [4]:
import sagemaker
from sagemaker.pytorch import PyTorchModel
from sagemaker import get_execution_role

In [7]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

## Upload pre-trained model to S3 (Need to run just once)

In [98]:
# Load pretrained ResNet model and save locally

import torch
from torchvision.models import resnet50, ResNet50_Weights

# Get pretrained model and save locally.
model = resnet50(weights=ResNet50_Weights.DEFAULT)
local_path = "./model.ckpt"
torch.save(model.state_dict(), local_path)

In [99]:
# Tar the model checkpoint and upload to S3 bucket

!tar -czvf model.tar.gz ./model.ckpt
!aws s3 mb s3://resnet50-model
!aws s3 cp ./model.tar.gz s3://resnet50-model/ 
!aws s3 ls s3://resnet50-model

## Run Batch Inference with Sagemaker APIs

In [2]:
# Must be a tar.gz file
model_artifact_s3_location = "s3://resnet50-model/model.tar.gz"

In [8]:
# Create a Sagemaker model. 
# The entry_point is the predict.py file in this directory.
# predict.py defines the logic for loading the model and handle prediction requests.

pytorch_model = PyTorchModel(
    model_data=model_artifact_s3_location,
    role=role,
    framework_version="1.12.1",
    py_version="py38",
    source_dir="./code",
    entry_point="predict.py",
)

In [9]:
# Create transformer from PyTorchModel object
# Use Sagemaker instance
transformer = pytorch_model.transformer(instance_count=4, instance_type="ml.g4dn.xlarge", strategy="MultiRecord", max_payload=100)

In [None]:
import time


start_time = time.time()
transformer.transform(
    data="s3://air-example-data-2/10G-image-data-synthetic-raw/", 
    data_type="S3Prefix", 
    content_type="application/x-image",
    wait=True,
    logs=False,
)
end_time = time.time()

[34m2023-01-31T03:24:37,487 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Initializing plugins manager...[0m
[34m2023-01-31T03:24:37,598 [INFO ] main org.pytorch.serve.ModelServer - [0m
[34mTorchserve version: 0.6.0[0m
[34mTS Home: /opt/conda/lib/python3.8/site-packages[0m
[34mCurrent directory: /[0m
[34mTemp directory: /home/model-server/tmp[0m
[34mNumber of GPUs: 1[0m
[34mNumber of CPUs: 4[0m
[34mMax heap size: 2980 M[0m
[34mPython executable: /opt/conda/bin/python3.8[0m
[34mConfig file: /etc/sagemaker-ts.properties[0m
[34mInference address: http://0.0.0.0:8080[0m
[34mManagement address: http://0.0.0.0:8080[0m
[34mMetrics address: http://127.0.0.1:8082[0m
[34mModel Store: /.sagemaker/ts/models[0m
[34mInitial Models: model=/opt/ml/model[0m
[34mLog dir: /logs[0m
[34mMetrics dir: /logs[0m
[34mNetty threads: 0[0m
[34mNetty client threads: 0[0m
[34mDefault workers per model: 1[0m
[34mBlacklist Regex: N/A[0m
[34mMaximum Respons

In [None]:
# Run batch inference with Parquet files...this fails.
import time


start_time = time.time()
transformer.transform(
    data="s3://air-example-data-2/10G-image-data-synthetic-raw-parquet-120-partition/", 
    data_type="S3Prefix", 
    content_type="application/x-parquet",
    wait=True,
    logs=False,
)
end_time = time.time()