In [None]:
import numpy as np
import time
import json
import requests
import boto3
import os
import sagemaker


In [None]:
from sagemaker import get_execution_role
from sagemaker.session import Session

role = get_execution_role()
sess = Session()
region = sess.boto_region_name
bucket = sess.default_bucket()

import torch
print(torch.__version__)


In [None]:
import sys
print(sys.version)


In [None]:
model_archive = 'yolov4.tar.gz'
!wget https://aws-ml-blog-artifacts.s3.us-east-2.amazonaws.com/yolov4.tar.gz


In [117]:
from sagemaker.utils import name_from_base
compilation_job_name = name_from_base('torchvision-yolov4-neo-1')
prefix = compilation_job_name+'/model'
model_path = sess.upload_data(path=model_archive, key_prefix=prefix)
compiled_model_path = 's3://{}/{}/output'.format(bucket, compilation_job_name)


In [130]:
framework_version = '1.6'
py_version = 'py3'
instance_type = 'ml.c5.9xlarge'
from sagemaker.pytorch.model import PyTorchModel
from sagemaker.predictor import Predictor

sm_model = PyTorchModel(model_data=model_path,
                               framework_version=framework_version,
                               role=role,
                               sagemaker_session=sess,
                               entry_point='inference_vanilla.py',
                               py_version=py_version,
                               env={"COMPILEDMODEL": 'False', 'MMS_MAX_RESPONSE_SIZE': '100000000', 'MMS_DEFAULT_RESPONSE_TIMEOUT': '500'}
                              )
uncompiled_predictor = sm_model.deploy(initial_instance_count=1, instance_type=instance_type)


-----!

In [109]:
input_layer_name = 'input0'
input_shape = [1,3,416,416]
data_shape = json.dumps({input_layer_name: input_shape})
target_device = 'ml_c5'
framework = 'PYTORCH'
from sagemaker.predictor import Predictor
compiled_env = {"MMS_DEFAULT_WORKERS_PER_MODEL":'1', "TVM_NUM_THREADS": '36', "COMPILEDMODEL": 'True', 'MMS_MAX_RESPONSE_SIZE': '100000000', 'MMS_DEFAULT_RESPONSE_TIMEOUT': '500'}
sm_model_compiled = PyTorchModel(model_data=model_path,
                               framework_version = framework_version,
                               role=role,
                               sagemaker_session=sess,
                               entry_point='inference.py',
                               predictor_cls=Predictor,
                               py_version=py_version,
                               env=compiled_env
                              )
compiled_model = sm_model_compiled.compile(target_instance_family=target_device, 
                                         input_shape=data_shape,
                                         job_name=compilation_job_name,
                                         role=role,
                                         framework=framework.lower(),
                                         framework_version=framework_version,
                                         output_path=compiled_model_path
                                        )


????????????????????????????????.....................................................!

In [110]:

optimized_predictor = compiled_model.deploy(initial_instance_count = 1,
                                  instance_type = instance_type 
#                                   serializer=sagemaker.serializers.IdentitySerializer(), 
#                                   deserializer=sagemaker.deserializers.BytesDeserializer()
                                 )


-----------!

In [132]:
iters = 1000
warmup = 100
client = boto3.client('sagemaker-runtime', region_name=region)

content_type = 'application/x-image'

sample_img_url = "https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg"
body = requests.get(sample_img_url).content
   
compiled_perf = []
uncompiled_perf = []


for i in range(0, iters):
    print(i)
    t0 = time.time()
    response = client.invoke_endpoint(EndpointName=optimized_predictor.endpoint_name, Body=body, ContentType=content_type)
    t1 = time.time()
    #convert to millis
    compiled_elapsed = (t1-t0)*1000

    t0 = time.time()
    response = client.invoke_endpoint(EndpointName=uncompiled_predictor.endpoint_name, Body=body, ContentType=content_type)
    t1 = time.time()
    #convert to millis
    uncompiled_elapsed = (t1-t0)*1000
    

    if warmup == 0:
        compiled_perf.append(compiled_elapsed)
        uncompiled_perf.append(uncompiled_elapsed)
    else:
        print(f'warmup ({i}, {iters}) : c - {compiled_elapsed} ms . uc - {uncompiled_elapsed} ms')
# # #         warmup = warmup – 1


0
warmup (0, 1000) : c - 168.3361530303955 ms . uc - 456.6977024078369 ms
1
warmup (1, 1000) : c - 191.4522647857666 ms . uc - 445.74666023254395 ms
2
warmup (2, 1000) : c - 180.45330047607422 ms . uc - 438.8444423675537 ms
3
warmup (3, 1000) : c - 199.66959953308105 ms . uc - 431.016206741333 ms
4
warmup (4, 1000) : c - 181.46300315856934 ms . uc - 465.5890464782715 ms
5
warmup (5, 1000) : c - 279.4523239135742 ms . uc - 462.62359619140625 ms
6
warmup (6, 1000) : c - 175.706148147583 ms . uc - 419.4176197052002 ms
7
warmup (7, 1000) : c - 180.26447296142578 ms . uc - 435.73737144470215 ms
8
warmup (8, 1000) : c - 173.24376106262207 ms . uc - 443.4037208557129 ms
9
warmup (9, 1000) : c - 198.9281177520752 ms . uc - 416.61906242370605 ms
10
warmup (10, 1000) : c - 186.29980087280273 ms . uc - 468.26791763305664 ms
11
warmup (11, 1000) : c - 167.86503791809082 ms . uc - 456.287145614624 ms
12
warmup (12, 1000) : c - 182.07859992980957 ms . uc - 439.64266777038574 ms
13
warmup (13, 1000) 

warmup (107, 1000) : c - 193.73321533203125 ms . uc - 406.39591217041016 ms
108
warmup (108, 1000) : c - 142.31085777282715 ms . uc - 368.1175708770752 ms
109
warmup (109, 1000) : c - 152.6942253112793 ms . uc - 394.55413818359375 ms
110
warmup (110, 1000) : c - 142.3661708831787 ms . uc - 394.00386810302734 ms
111
warmup (111, 1000) : c - 174.38220977783203 ms . uc - 410.5224609375 ms
112
warmup (112, 1000) : c - 142.0290470123291 ms . uc - 426.5439510345459 ms
113
warmup (113, 1000) : c - 144.35720443725586 ms . uc - 426.76711082458496 ms
114
warmup (114, 1000) : c - 183.86411666870117 ms . uc - 402.850866317749 ms
115
warmup (115, 1000) : c - 182.4629306793213 ms . uc - 373.396635055542 ms
116
warmup (116, 1000) : c - 221.8642234802246 ms . uc - 421.8711853027344 ms
117
warmup (117, 1000) : c - 154.93369102478027 ms . uc - 363.7275695800781 ms
118
warmup (118, 1000) : c - 144.32120323181152 ms . uc - 364.37249183654785 ms
119
warmup (119, 1000) : c - 156.42046928405762 ms . uc - 404

warmup (211, 1000) : c - 150.68387985229492 ms . uc - 405.35497665405273 ms
212
warmup (212, 1000) : c - 145.216703414917 ms . uc - 403.9120674133301 ms
213
warmup (213, 1000) : c - 157.13095664978027 ms . uc - 420.6051826477051 ms
214
warmup (214, 1000) : c - 155.059814453125 ms . uc - 370.7318305969238 ms
215
warmup (215, 1000) : c - 153.40733528137207 ms . uc - 364.0305995941162 ms
216
warmup (216, 1000) : c - 142.62700080871582 ms . uc - 425.04286766052246 ms
217
warmup (217, 1000) : c - 154.85477447509766 ms . uc - 443.2358741760254 ms
218
warmup (218, 1000) : c - 149.82199668884277 ms . uc - 406.6004753112793 ms
219
warmup (219, 1000) : c - 148.27346801757812 ms . uc - 410.66622734069824 ms
220
warmup (220, 1000) : c - 151.85093879699707 ms . uc - 412.2447967529297 ms
221
warmup (221, 1000) : c - 234.92813110351562 ms . uc - 425.295352935791 ms
222
warmup (222, 1000) : c - 145.79367637634277 ms . uc - 384.9821090698242 ms
223
warmup (223, 1000) : c - 150.9239673614502 ms . uc - 3

warmup (315, 1000) : c - 161.47422790527344 ms . uc - 433.8216781616211 ms
316
warmup (316, 1000) : c - 230.79752922058105 ms . uc - 362.9481792449951 ms
317
warmup (317, 1000) : c - 165.9865379333496 ms . uc - 392.59910583496094 ms
318
warmup (318, 1000) : c - 155.06410598754883 ms . uc - 376.603364944458 ms
319
warmup (319, 1000) : c - 165.4508113861084 ms . uc - 404.22773361206055 ms
320
warmup (320, 1000) : c - 177.21891403198242 ms . uc - 422.9471683502197 ms
321
warmup (321, 1000) : c - 197.77250289916992 ms . uc - 402.3151397705078 ms
322
warmup (322, 1000) : c - 145.71332931518555 ms . uc - 382.5061321258545 ms
323
warmup (323, 1000) : c - 159.16991233825684 ms . uc - 384.81950759887695 ms
324
warmup (324, 1000) : c - 147.5851535797119 ms . uc - 364.2010688781738 ms
325
warmup (325, 1000) : c - 152.11796760559082 ms . uc - 393.0373191833496 ms
326
warmup (326, 1000) : c - 174.7887134552002 ms . uc - 383.2433223724365 ms
327
warmup (327, 1000) : c - 141.26968383789062 ms . uc - 

warmup (419, 1000) : c - 144.1352367401123 ms . uc - 398.5598087310791 ms
420
warmup (420, 1000) : c - 145.71714401245117 ms . uc - 440.02652168273926 ms
421
warmup (421, 1000) : c - 140.0587558746338 ms . uc - 403.20515632629395 ms
422
warmup (422, 1000) : c - 169.36802864074707 ms . uc - 393.5582637786865 ms
423
warmup (423, 1000) : c - 159.21449661254883 ms . uc - 390.35773277282715 ms
424
warmup (424, 1000) : c - 157.51051902770996 ms . uc - 380.11693954467773 ms
425
warmup (425, 1000) : c - 159.2557430267334 ms . uc - 435.80150604248047 ms
426
warmup (426, 1000) : c - 221.7116355895996 ms . uc - 393.7666416168213 ms
427
warmup (427, 1000) : c - 151.49784088134766 ms . uc - 374.8459815979004 ms
428
warmup (428, 1000) : c - 164.3989086151123 ms . uc - 449.47195053100586 ms
429
warmup (429, 1000) : c - 150.69580078125 ms . uc - 396.80981636047363 ms
430
warmup (430, 1000) : c - 147.32933044433594 ms . uc - 384.1233253479004 ms
431
warmup (431, 1000) : c - 148.0262279510498 ms . uc - 

warmup (523, 1000) : c - 145.80607414245605 ms . uc - 431.41865730285645 ms
524
warmup (524, 1000) : c - 147.72629737854004 ms . uc - 392.9316997528076 ms
525
warmup (525, 1000) : c - 153.61738204956055 ms . uc - 376.7116069793701 ms
526
warmup (526, 1000) : c - 160.60996055603027 ms . uc - 362.8063201904297 ms
527
warmup (527, 1000) : c - 206.46262168884277 ms . uc - 385.56790351867676 ms
528
warmup (528, 1000) : c - 151.94368362426758 ms . uc - 420.8333492279053 ms
529
warmup (529, 1000) : c - 244.43292617797852 ms . uc - 383.4385871887207 ms
530
warmup (530, 1000) : c - 152.9521942138672 ms . uc - 431.15925788879395 ms
531
warmup (531, 1000) : c - 142.16995239257812 ms . uc - 412.57667541503906 ms
532
warmup (532, 1000) : c - 171.8885898590088 ms . uc - 429.5973777770996 ms
533
warmup (533, 1000) : c - 144.80090141296387 ms . uc - 418.8525676727295 ms
534
warmup (534, 1000) : c - 162.33468055725098 ms . uc - 399.7526168823242 ms
535
warmup (535, 1000) : c - 143.2805061340332 ms . uc

warmup (627, 1000) : c - 162.3513698577881 ms . uc - 387.2253894805908 ms
628
warmup (628, 1000) : c - 154.19840812683105 ms . uc - 412.36281394958496 ms
629
warmup (629, 1000) : c - 237.21933364868164 ms . uc - 379.6384334564209 ms
630
warmup (630, 1000) : c - 152.3120403289795 ms . uc - 421.4444160461426 ms
631
warmup (631, 1000) : c - 157.2399139404297 ms . uc - 374.4633197784424 ms
632
warmup (632, 1000) : c - 151.72767639160156 ms . uc - 388.9446258544922 ms
633
warmup (633, 1000) : c - 159.66057777404785 ms . uc - 375.368595123291 ms
634
warmup (634, 1000) : c - 144.51074600219727 ms . uc - 382.1742534637451 ms
635
warmup (635, 1000) : c - 150.9864330291748 ms . uc - 368.96371841430664 ms
636
warmup (636, 1000) : c - 191.60938262939453 ms . uc - 368.27826499938965 ms
637
warmup (637, 1000) : c - 150.97713470458984 ms . uc - 398.64230155944824 ms
638
warmup (638, 1000) : c - 155.3325653076172 ms . uc - 419.8732376098633 ms
639
warmup (639, 1000) : c - 155.21788597106934 ms . uc - 

warmup (731, 1000) : c - 145.33376693725586 ms . uc - 401.8890857696533 ms
732
warmup (732, 1000) : c - 162.82153129577637 ms . uc - 407.0460796356201 ms
733
warmup (733, 1000) : c - 142.89021492004395 ms . uc - 370.4357147216797 ms
734
warmup (734, 1000) : c - 151.0934829711914 ms . uc - 409.773588180542 ms
735
warmup (735, 1000) : c - 147.4630832672119 ms . uc - 431.3950538635254 ms
736
warmup (736, 1000) : c - 230.76748847961426 ms . uc - 416.0287380218506 ms
737
warmup (737, 1000) : c - 159.88683700561523 ms . uc - 397.613525390625 ms
738
warmup (738, 1000) : c - 162.400484085083 ms . uc - 390.6514644622803 ms
739
warmup (739, 1000) : c - 148.8351821899414 ms . uc - 391.53218269348145 ms
740
warmup (740, 1000) : c - 175.91500282287598 ms . uc - 375.78296661376953 ms
741
warmup (741, 1000) : c - 164.90864753723145 ms . uc - 354.2649745941162 ms
742
warmup (742, 1000) : c - 162.0793342590332 ms . uc - 382.6301097869873 ms
743
warmup (743, 1000) : c - 153.42450141906738 ms . uc - 347.

KeyboardInterrupt: 