## You can download an NVIDIA model from torchhub

#### ... and then compress it as a tar.gz file, and use this for deployment:

In [1]:
!wget https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt

--2020-03-18 19:37:01--  https://api.ngc.nvidia.com/v2/models/nvidia/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt
Resolving api.ngc.nvidia.com (api.ngc.nvidia.com)... 52.35.21.100, 35.161.41.62
Connecting to api.ngc.nvidia.com (api.ngc.nvidia.com)|52.35.21.100|:443... connected.
HTTP request sent, awaiting response... 302 
Location: https://s3.us-west-2.amazonaws.com/prod-model-registry-ngc-bucket/org/nvidia/models/ssdpyt_fp32/versions/2/files/nvidia_ssdpyt_fp32_190826.pt?response-content-disposition=attachment%3B%20filename%3D%22nvidia_ssdpyt_fp32_190826.pt%22&response-content-type=application%2Foctet-stream&X-Amz-Security-Token=IQoJb3JpZ2luX2VjENv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLXdlc3QtMiJIMEYCIQDQgkQcQ9pDRllmFXUkmQyrW6dohDEsW0WJJLAgyaxO%2BgIhAPfQV9WZeKSAzXRZwTT3q3ZjKW5GGQ%2Bk0LSSk70QiEkEKr0DCMT%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQAhoMNzg5MzYzMTM1MDI3IgxSXrpcKj5tL8heQiUqkQONTLPWf%2Bhetcq1bgG5mZ0jBrR4Hz3uN03%2BA8d8zT%2Fy9tgCLha9kXrTx1hhgqE5ZyiaoO7e9T62JWbkPCQbzq0EZLES%2F

In [2]:
import tarfile
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add('nvidia_ssdpyt_fp32_190826.pt')

In [4]:
import sagemaker
sess = sagemaker.Session()

In [5]:
bucket = sess.default_bucket()

In [7]:
modelpath = sess.upload_data(
    path='model.tar.gz', bucket=bucket,
    key_prefix='sagemaker-pytorch/input')

In [8]:
modelpath

's3://sagemaker-us-east-1-497456752804/sagemaker-pytorch/input/model.tar.gz'

In [9]:
from sagemaker import get_execution_role
role = get_execution_role()

In [10]:
%%writefile transform_script.py

import torch
import os

def model_fn(model_dir):
    model = torch.load(os.path.join(model_dir, 'nvidia_ssdpyt_fp32_190826.pt')
    return model

Overwriting transform_script.py


In [12]:
from sagemaker.pytorch.model import PyTorchModel

In [13]:
pytorch_model = PyTorchModel(model_data=modelpath, role=role,
                             entry_point='transform_script.py',
                             framework_version='1.4.0')

predictor = pytorch_model.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1, wait=False)

## Also, you can download the model from torchhub with this API

In [14]:
import torch 
precision = 'fp32'
ssd_model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd', model_math=precision)

Using cache found in /home/ec2-user/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


In [15]:
!ls

model.tar.gz			tmp.tar.gz
nvidia_ssdpyt_fp32_190826.pt	transform_script_hub.py
nvidia_ssdpyt_fp32_190826.pt.1	transform_script.py
Pytorch BYOM from NGC.ipynb	u.item
tmp


## Or better, download the model from torch hub on the fly

In [1]:
%%writefile transform_script_hub.py

import torch
import os

def model_fn(model_dir):
    model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd', model_math='fp32',map_location='cpu')
    return model

Overwriting transform_script_hub.py


In [2]:
#PyTorchModel requires a non-empty, model_data file
from sagemaker.pytorch.model import PyTorchModel
from sagemaker import get_execution_role
role = get_execution_role()

!echo "tmp content" > tmp
!tar -zcvf ./tmp.tar.gz tmp
pytorch_model = PyTorchModel(model_data = 'file://tmp.tar.gz',
                             role=role,
                             entry_point='./transform_script_hub.py',
                             framework_version='1.4.0')

predictor = pytorch_model.deploy(instance_type='local_gpu', initial_instance_count=1, endpoint_name='nvidia-ssd-pytorch-cpu')


tmp
Attaching to tmp1ay9urvt_algo-1-9rmje_1
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:25:57,205 [INFO ] main com.amazonaws.ml.mms.ModelServer - 
[36malgo-1-9rmje_1  |[0m MMS Home: /opt/conda/lib/python3.6/site-packages
[36malgo-1-9rmje_1  |[0m Current directory: /
[36malgo-1-9rmje_1  |[0m Temp directory: /home/model-server/tmp
[36malgo-1-9rmje_1  |[0m Number of GPUs: 4
[36malgo-1-9rmje_1  |[0m Number of CPUs: 32
[36malgo-1-9rmje_1  |[0m Max heap size: 27305 M
[36malgo-1-9rmje_1  |[0m Python executable: /opt/conda/bin/python
[36malgo-1-9rmje_1  |[0m Config file: /etc/sagemaker-mms.properties
[36malgo-1-9rmje_1  |[0m Inference address: http://0.0.0.0:8080
[36malgo-1-9rmje_1  |[0m Management address: http://0.0.0.0:8080
[36malgo-1-9rmje_1  |[0m Model Store: /.sagemaker/mms/models
[36malgo-1-9rmje_1  |[0m Initial Models: ALL
[36malgo-1-9rmje_1  |[0m Log dir: /logs
[36malgo-1-9rmje_1  |[0m Metrics dir: /logs
[36malgo-1-9rmje_1  |[0m Netty threads: 0
[36malgo-1-

In [8]:
import numpy as np
import urllib
import pickle 

# METHOD #1: OpenCV, NumPy, and urllib
def url_to_image(url):
    resp = urllib.request.urlopen(url)
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    return image

In [11]:
url_to_image('https://upload.wikimedia.org/wikipedia/commons/2/25/Postmen_Office_Room.jpg')

array([255, 216, 255, ..., 127, 255, 217], dtype=uint8)

In [12]:
from sagemaker.predictor import numpy_deserializer, npy_serializer

predictor.serializer= npy_serializer
predictor.deserializer= numpy_deserializer

In [13]:
predictor.predict(url_to_image('https://upload.wikimedia.org/wikipedia/commons/2/25/Postmen_Office_Room.jpg'))

[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:56,003 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - terminate called after throwing an instance of 'c10::Error'
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:56,003 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle -   what():  owning_ptr == NullType::singleton() || owning_ptr->refcount_.load() > 0 INTERNAL ASSERT FAILED at /opt/conda/conda-bld/pytorch_1579022034529/work/c10/util/intrusive_ptr.h:348, please report a bug to PyTorch. intrusive_ptr: Can only intrusive_ptr::reclaim() owning pointers that were created using intrusive_ptr::release(). (reclaim at /opt/conda/conda-bld/pytorch_1579022034529/work/c10/util/intrusive_ptr.h:348)
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:56,004 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x47 (0x7fcc6797a627 in /opt/conda/lib/python3.6/site-packages/torch/lib/libc

[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:56,243 [INFO ] epollEventLoopGroup-4-2 com.amazonaws.ml.mms.wlm.WorkerThread - 9000 Worker disconnected. WORKER_MODEL_LOADED
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:56,244 [INFO ] W-9000-model ACCESS_LOG - /172.18.0.1:34954 "POST /invocations HTTP/1.1" 500 3975
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:56,244 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Retry worker: 9000 in 1 seconds.


OSError: Failed to interpret file <_io.BytesIO object at 0x7ff6f0c14678> as a pickle

[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,348 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Listening on port: /home/model-server/tmp/.mms.sock.9000
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,350 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - [PID]193
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,350 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - MXNet worker started.
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,350 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /home/model-server/tmp/.mms.sock.9000
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,350 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Python runtime: 3.6.6
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,352 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /home/model-server/tmp/.mms.sock.9000.
[36malgo-1-9rmje_1  |[0m 2020-04-03 20:45:57,915 [INF