# Creates a model archive file to be used in the torchserve for deployment

## Downloads the model from the s3 link 

In [3]:
 # Download the model and rename it to pytorch_model.bin, then move it to model folder
!wget http://dl.fbaipublicfiles.com/dynabench/nli/bert_round1.pt

--2020-06-29 11:16:05--  http://dl.fbaipublicfiles.com/dynabench/nli/bert_round1.pt
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.74.142, 104.22.75.142, 172.67.9.4, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.74.142|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1340686343 (1.2G) [application/octet-stream]
Saving to: ‘bert_round1.pt’


2020-06-29 11:16:24 (67.6 MB/s) - ‘bert_round1.pt’ saved [1340686343/1340686343]



In [1]:
## Installs torchserve and torch-model-archiver to be used in this kernel
!pip install torchserve torch-model-archiver

[31mfastai 1.0.60 requires nvidia-ml-py3, which is not installed.[0m
[33mYou are using pip version 10.0.1, however version 20.2b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## Model Archiving

In [2]:
# Creates session to get the defualt bucket name to push the tar file
import boto3, time, json, sagemaker
sess = boto3.Session()
sagemaker_session = sagemaker.Session(boto_session=sess)

model_file_name = "nli_r1_1"
bucket_name = sagemaker_session.default_bucket()
prefix = 'torchserve'


In [3]:
#Make sure you have the following files in the model folder NliTransformerHandler.py, settings.py, setup_config.json,214d4777e8e3eb234563136cd3a49f6bc34131de836848454373fa43f10adc5e.abfbb80ee795a608acbf35c7bf2d2d58574df3887cdd94b355fc67e03fddba05, 9b3c03a36e83b13d5ba95ac965c9f9074a99e14340c523ab405703179e79fc46.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084.json"

In [19]:
#Place the model (.pt) and settings file in the appropriate round*_* folder. For eg: round2_1 is for round2's first model
#Before executing the torch-model-archiver, Change the model name, serialized file path of the model and also the settings file path to the appropriate round*_* folder 

In [12]:
!torch-model-archiver --model-name nli_r1_1 --version 1.0 --serialized-file model/round1_1/pytorch_model.pt --handler model/NliTransformerHandler.py --extra-files "model/round1_1/settings.py,./model/setup_config.json,model/214d4777e8e3eb234563136cd3a49f6bc34131de836848454373fa43f10adc5e.abfbb80ee795a608acbf35c7bf2d2d58574df3887cdd94b355fc67e03fddba05,model/9b3c03a36e83b13d5ba95ac965c9f9074a99e14340c523ab405703179e79fc46.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084.json"

In [13]:
#The mar file will be present in the home directory
#This creates a tar file to be used in the sagemaker deployment

In [14]:
!tar cvfz {model_file_name}.tar.gz {model_file_name}.mar

nli_r1_1.mar


In [17]:
#Moves the tar file to mars folder
!mv {model_file_name}.tar.gz {model_file_name}.mar ./mars/

In [18]:
# change the folder name before the last slash based on the task
!aws s3 cp mars/{model_file_name}.tar.gz s3://{bucket_name}/{prefix}/models/nli/
#The below s3 link will be given as the model data for sagemaker while deployment

upload: mars/nli_r1_1.tar.gz to s3://sagemaker-us-west-1-096166425824/torchserve/models/nli/nli_r1_1.tar.gz


## Getting the default state dicts using BertTokenizer, BertModel, BertAdam from pytorch_pretrained_bert

In [None]:
!pip3 install pytorch_pretrained_bert

In [7]:
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertAdam

In [3]:
# Place the state_dicts inside model folder of nli

In [9]:
BertTokenizer.from_pretrained("bert-large-uncased", do_lower_case=True,
                                                            cache_dir=".")

100%|██████████| 231508/231508 [00:00<00:00, 1233418.56B/s]


<pytorch_pretrained_bert.tokenization.BertTokenizer at 0x7f4ddb498320>

In [10]:
BertModel.from_pretrained("bert-large-uncased", cache_dir=".")

100%|██████████| 1248501532/1248501532 [00:27<00:00, 44637113.79B/s]


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 1024, padding_idx=0)
    (position_embeddings): Embedding(512, 1024)
    (token_type_embeddings): Embedding(2, 1024)
    (LayerNorm): BertLayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (LayerNorm): BertLayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (intermediate): BertIntermediate(
          (

In [4]:
## To unzip the anli folder which will be placed inside docker in the Create_sagemaker_torchserve_AMI notebook

In [22]:
!sudo yum install unzip

Loaded plugins: dkms-build-requires, priorities, update-motd, upgrade-helper,
              : versionlock
amzn-main                                                | 2.1 kB     00:00     
amzn-updates                                             | 3.8 kB     00:00     
Package unzip-6.0-4.10.amzn1.x86_64 already installed and latest version
Nothing to do


In [23]:
!unzip anli.zip -d .

Archive:  anli.zip
   creating: ./anli/
  inflating: ./anli/.DS_Store        
   creating: ./__MACOSX/
   creating: ./__MACOSX/anli/
  inflating: ./__MACOSX/anli/._.DS_Store  
   creating: ./anli/.git/
   creating: ./anli/.git/branches/
   creating: ./__MACOSX/anli/.git/
  inflating: ./__MACOSX/anli/.git/._branches  
  inflating: ./anli/.git/config      
  inflating: ./__MACOSX/anli/.git/._config  
  inflating: ./anli/.git/description  
  inflating: ./__MACOSX/anli/.git/._description  
  inflating: ./anli/.git/HEAD        
  inflating: ./__MACOSX/anli/.git/._HEAD  
   creating: ./anli/.git/hooks/
  inflating: ./anli/.git/hooks/applypatch-msg.sample  
   creating: ./__MACOSX/anli/.git/hooks/
  inflating: ./__MACOSX/anli/.git/hooks/._applypatch-msg.sample  
  inflating: ./anli/.git/hooks/commit-msg.sample  
  inflating: ./__MACOSX/anli/.git/hooks/._commit-msg.sample  
  inflating: ./anli/.git/hooks/fsmonitor-watchman.sample  
  inflating: ./__MACOSX/anli/.git/hooks

  inflating: ./__MACOSX/anli/src/py_scripts/._qualification_ops  
  inflating: ./__MACOSX/anli/src/._py_scripts  
   creating: ./anli/src/roberta_model/
 extracting: ./anli/src/roberta_model/__init__.py  
   creating: ./__MACOSX/anli/src/roberta_model/
  inflating: ./__MACOSX/anli/src/roberta_model/.___init__.py  
   creating: ./anli/src/roberta_model/__pycache__/
  inflating: ./anli/src/roberta_model/__pycache__/__init__.cpython-37.pyc  
  inflating: ./anli/src/roberta_model/__pycache__/nli_training.cpython-37.pyc  
  inflating: ./anli/src/roberta_model/nli_training.py  
  inflating: ./__MACOSX/anli/src/roberta_model/._nli_training.py  
  inflating: ./__MACOSX/anli/src/._roberta_model  
   creating: ./anli/src/utils/
 extracting: ./anli/src/utils/__init__.py  
   creating: ./__MACOSX/anli/src/utils/
  inflating: ./__MACOSX/anli/src/utils/.___init__.py  
   creating: ./anli/src/utils/__pycache__/
  inflating: ./anli/src/utils/__pycache__/__init__.cpython-37.pyc  
  in