## 1. libraries

In [1]:
import torch
# import torch.neuron
import torch_neuron

import torchvision
from torchvision import transforms as T
from PIL import Image
import os
from tqdm import tqdm
import time
preprocess_parseq = T.Compose([
            T.Resize((32, 128), T.InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(0.5, 0.5)
        ])

parseq = torch.hub.load('baudm/parseq', 'parseq', pretrained=True).eval()

Using cache found in /home/ubuntu/.cache/torch/hub/baudm_parseq_main


In [2]:
torch.__version__

'1.12.1+cu102'

In [6]:
from pathlib import PurePath
from typing import Sequence
from torch import nn
import yaml

device = torch.device("cpu")
chkpt_dir = "/home/ubuntu/.cache/torch/hub/checkpoints/parseq-bb5792a6.pt"
checkpoint = torch.load(chkpt_dir, map_location=device)

def _get_config(experiment="parseq"):
    """Emulates hydra config resolution"""
    root = PurePath("strhub/models/utils.py").parents[2]
    with open(root / 'configs/main.yaml', 'r') as f:
        config = yaml.load(f, yaml.Loader)['model']
    with open(root / f'configs/charset/94_full.yaml', 'r') as f:
        config.update(yaml.load(f, yaml.Loader)['model'])
    with open(root / f'configs/experiment/{experiment}.yaml', 'r') as f:
        exp = yaml.load(f, yaml.Loader)
    # Apply base model config
    model = exp['defaults'][0]['override /model']
    with open(root / f'configs/model/{model}_trace.yaml', 'r') as f:
        config.update(yaml.load(f, yaml.Loader))
    # Apply experiment config
    if 'model' in exp:
        config.update(exp['model'])
    # Workaround for now: manually cast the lr to the correct type.
    config['lr'] = float(config['lr'])
    return config

def _get_model_class(key):
    if 'abinet' in key:
        from strhub.models.abinet.system import ABINet as ModelClass
    elif 'crnn' in key:
        from strhub.models.crnn.system import CRNN as ModelClass
    elif 'parseq' in key:
        from strhub.models.parseq.system import PARSeq as ModelClass
    elif 'trba' in key:
        from strhub.models.trba.system import TRBA as ModelClass
    elif 'trbc' in key:
        from strhub.models.trba.system import TRBC as ModelClass
    elif 'vitstr' in key:
        from strhub.models.vitstr.system import ViTSTR as ModelClass
    else:
        raise InvalidModelError("Unable to find model class for '{}'".format(key))
    return ModelClass


def create_model():
    config = _get_config("parseq")
    ModelClass = _get_model_class("parseq")
    model = ModelClass(**config).eval()
    model.load_state_dict(checkpoint)
    return model

charset = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
BOS = '[B]'
EOS = '[E]'
PAD = '[P]'
specials_first = (EOS,)
specials_last = (BOS, PAD)
itos = specials_first + tuple(charset) + specials_last
stoi = {s: i for i, s in enumerate(itos)}
eos_id, bos_id, pad_id = [stoi[s] for s in specials_first + specials_last]
itos = specials_first + tuple(charset) + specials_last

def tokenizer_filter(probs, ids):
    ids = ids.tolist()
    try:
        eos_idx = ids.index(eos_id)
    except ValueError:
        eos_idx = len(ids)  # Nothing to truncate.
    # Truncate after EOS
    ids = ids[:eos_idx]
    probs = probs[:eos_idx + 1]  # but include prob. for EOS (if it exists)
    return probs, ids

def ids2tok(token_ids):
    tokens = [itos[i] for i in token_ids]
    return ''.join(tokens)

def decode(token_dists):
    """Decode a batch of token distributions.
    Args:
        token_dists: softmax probabilities over the token distribution. Shape: N, L, C
        raw: return unprocessed labels (will return list of list of strings)

    Returns:
        list of string labels (arbitrary length) and
        their corresponding sequence probabilities as a list of Tensors
    """
    batch_tokens = []
    batch_probs = []
    for dist in token_dists:
        probs, ids = dist.max(-1)  # greedy selection
        probs, ids = tokenizer_filter(probs, ids)
        tokens = ids2tok(ids)
        batch_tokens.append(tokens)
        batch_probs.append(probs)
    return batch_tokens, batch_probs

preprocess_parseq = T.Compose([
            T.Resize((32, 128), T.InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(0.5, 0.5)
        ])

In [7]:
parseq_model = create_model()

In [8]:
# model_parallel = torch.neuron.DataParallel(neuron_model)

img_folder = "/home/ubuntu/parseq/digits_demo"
batch_size = 8
input_holder = torch.zeros(batch_size, 3, 32, 128)

img_paths = [os.path.join(img_folder, x) for x in os.listdir(img_folder) if x.endswith("jpg")]

N = len(img_paths)
n_batch = N//batch_size + 1

for i in range(n_batch):
    # print(list(range((i-1)*batch_size,batch_size*i)))
    if batch_size*(i+1) < N:
        input_holder = torch.zeros(batch_size, 3, 32, 128)
        img_paths_tmp = img_paths[(i)*batch_size:batch_size*(i+1)]
    else:
        input_holder = torch.zeros(N-(n_batch-1)*batch_size, 3, 32, 128)
        img_paths_tmp = img_paths[(i)*batch_size:N]
    
    print(input_holder.size())
    for j in range(len(img_paths_tmp)):
                
        img_input = Image.open(img_paths_tmp[j]).convert('RGB')
        img_input = preprocess_parseq(img_input.convert('RGB'))
        # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
        
        input_holder[j, :, :, :] = img_input[:,:,:]
        
    start = time.time()
    logits_ts = parseq_model(input_holder)
    end_1 = time.time()
    logits = parseq(input_holder)
    end_2 = time.time()

    # print("parseq")
    # print(end_2-end_1)
    # print("torchscript")
    # print(end_1-start)
    pred = logits.softmax(-1)
    label, confidence = decode(pred)
        
    pred = logits_ts.softmax(-1)
    label_ts, confidence = decode(pred)
    
    print(label_ts, label)


torch.Size([8, 3, 32, 128])
['448', '398', '464', '422', '464', '597', '3.34', '3.77'] ['448', '398', '464', '422', '464', '597', '3.34', '3.77']
torch.Size([8, 3, 32, 128])
['597', "'13N", '498', '647', '314', '4#', '224', '348'] ['597', "'13N", '498', '647', '314', '4#', '224', '348']
torch.Size([8, 3, 32, 128])
['464', '427', '698', '464', '458', '558', '1087', '464'] ['464', '427', '698', '464', '458', '558', '1087', '464']
torch.Size([8, 3, 32, 128])
['297', '472', '298', '98', '674', '387', '422', '467'] ['297', '472', '298', '98', '674', '387', '422', '467']
torch.Size([8, 3, 32, 128])
['538', '538', '547', '397', '474', '497', '997', '497'] ['538', '538', '547', '397', '474', '497', '997', '497']
torch.Size([8, 3, 32, 128])
['224', '417', '417', '538', '497', '494', '497', '$496'] ['224', '417', '417', '538', '497', '494', '497', '$496']
torch.Size([8, 3, 32, 128])
['1897', '5.00', '$224', '594', '382', '538', '697', '414'] ['1897', '5.00', '$224', '594', '382', '538', '697', '

In [12]:
output_path = "parseq_cpu_trace.pt"
ts_model = parseq_model.to_torchscript(file_path=output_path, method="trace", example_inputs = torch.rand(size=(1, 3, 32, 128)))

print(ts_model)



PARSeq(
  original_name=PARSeq
  (encoder): Encoder(
    original_name=Encoder
    (patch_embed): PatchEmbed(
      original_name=PatchEmbed
      (proj): Conv2d(original_name=Conv2d)
      (norm): Identity(original_name=Identity)
    )
    (pos_drop): Dropout(original_name=Dropout)
    (norm_pre): Identity(original_name=Identity)
    (blocks): Sequential(
      original_name=Sequential
      (0): Block(
        original_name=Block
        (norm1): LayerNorm(original_name=LayerNorm)
        (attn): Attention(
          original_name=Attention
          (qkv): Linear(original_name=Linear)
          (attn_drop): Dropout(original_name=Dropout)
          (proj): Linear(original_name=Linear)
          (proj_drop): Dropout(original_name=Dropout)
        )
        (ls1): Identity(original_name=Identity)
        (drop_path1): Identity(original_name=Identity)
        (norm2): LayerNorm(original_name=LayerNorm)
        (mlp): Mlp(
          original_name=Mlp
          (fc1): Linear(original_name

In [13]:
allowed_ops = set(torch.neuron.get_supported_operations())
allowed_ops.remove("aten::view")


neuron_model = torch.neuron.trace(ts_model, compiler_workdir="./workdir", example_inputs=torch.rand(size=(1, 3, 32, 128)), op_whitelist=allowed_ops)


  "The input to trace is already a ScriptModule, tracing it is a no-op. Returning the object as is."
INFO:Neuron:There are 119 ops of 4 different types in the TorchScript that are not compiled by neuron-cc: aten::view, aten::baddbmm, aten::embedding, aten::index_put_, (For more information see https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/compiler/neuron-cc/neuron-cc-ops/neuron-cc-ops-pytorch.html)
INFO:Neuron:Number of arithmetic operators (pre-compilation) before = 1897, fused = 1524, percent fused = 80.34%


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


INFO:Neuron:Compiling function _NeuronGraph$1337 with neuron-cc; log file is at /home/ubuntu/parseq/workdir/0/graph_def.neuron-cc.log
INFO:Neuron:Compiling with command line: '/home/ubuntu/env_aws_neuron_pytorch/bin/neuron-cc compile /home/ubuntu/parseq/workdir/0/graph_def.pb --framework TENSORFLOW --pipeline compile SaveTemps --output /home/ubuntu/parseq/workdir/0/graph_def.neff --io-config {"inputs": {"0:0": [[1, 3, 32, 128], "float32"]}, "outputs": ["aten_detach/Const:0", "aten_cat/concat:0", "aten_slice_6/StridedSlice:0", "aten_slice_10/StridedSlice:0", "aten_slice_14/StridedSlice:0", "aten_slice_18/StridedSlice:0", "aten_slice_22/StridedSlice:0", "aten_slice_26/StridedSlice:0", "aten_slice_30/StridedSlice:0", "aten_slice_34/StridedSlice:0", "aten_slice_38/StridedSlice:0", "aten_slice_42/StridedSlice:0", "aten_slice_46/StridedSlice:0", "aten_slice_50/StridedSlice:0", "aten_slice_54/StridedSlice:0", "aten_slice_58/StridedSlice:0", "aten_slice_62/StridedSlice:0", "aten_slice_66/Strid




INFO:Neuron:Compiling function _NeuronGraph$1338 with neuron-cc; log file is at /home/ubuntu/parseq/workdir/51/graph_def.neuron-cc.log
INFO:Neuron:Compiling with command line: '/home/ubuntu/env_aws_neuron_pytorch/bin/neuron-cc compile /home/ubuntu/parseq/workdir/51/graph_def.pb --framework TENSORFLOW --pipeline compile SaveTemps --output /home/ubuntu/parseq/workdir/51/graph_def.neff --io-config {"inputs": {}, "outputs": ["aten_triu/Select:0", "Decoder_132/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDims:0", "Decoder_222/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDims:0", "Decoder_312/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDims:0", "Decoder_402/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDims:0", "Decoder_492/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDims:0", "Decoder_582/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDims:0", "Decoder_672/DecoderLayer_3/MultiheadAttention_14/aten_unsqueeze/ExpandDi

In [None]:
torch.jit.save(neuron_model,'parseq_model_traced_neuron_new.pt')

In [7]:
neuron_model = torch.jit.load('parseq_model_traced_neuron_new.pt')

2023-Mar-16 23:27:19.0305  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0309  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0313  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0318  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0322  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0326  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0330  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0335  7392:7392  ERROR   HAL:aws_hal_tpb_pooling_write_profile       failed programming the engine

2023-Mar-16 23:27:19.0339  7392:

RuntimeError: The PyTorch Neuron Runtime could not be initialized. Neuron Driver issues are logged
to your system logs. See the Neuron Runtime's troubleshooting guide for help on this
topic: https://awsdocs-neuron.readthedocs-hosted.com/en/latest/

In [None]:

model_parallel = torch.neuron.DataParallel(neuron_model)

img_folder = "/home/ubuntu/parseq/digits_demo"

input_holder = torch.zeros(batch_size, 3, 32, 128)

img_paths = [os.path.join(img_folder, x) for x in os.listdir(img_folder) if x.endswith("jpg")]

N = len(img_paths)
n_batch = N//batch_size + 1

for i in range(n_batch):
    # print(list(range((i-1)*batch_size,batch_size*i)))
    if batch_size*(i+1) < N:
        input_holder = torch.zeros(batch_size, 3, 32, 128)
        img_paths_tmp = img_paths[(i)*batch_size:batch_size*(i+1)]
    else:
        input_holder = torch.zeros(N-(n_batch-1)*batch_size, 3, 32, 128)
        img_paths_tmp = img_paths[(i)*batch_size:N]
    
    print(input_holder.size())
    for j in range(len(img_paths_tmp)):
                
        img_input = Image.open(img_paths_tmp[j]).convert('RGB')
        img_input = preprocess_parseq(img_input.convert('RGB'))
        # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
        
        input_holder[j, :, :, :] = img_input[:,:,:]
        
    start = time.time()
    logits_neuron = model_parallel(input_holder)
    end_1 = time.time()
    logits = parseq(input_holder)
    end_2 = time.time()

    # print("parseq")
    # print(end_2-end_1)
    # print("torchscript")
    # print(end_1-start)
    pred = logits.softmax(-1)
    label, confidence = parseq.tokenizer.decode(pred)
        
    pred = logits_neuron.softmax(-1)
    label_neuron, confidence = parseq.tokenizer.decode(pred)
    
    print(label_neuron, label)



In [None]:
len(img_paths)