## Setup

In [1]:
!python3 -m  pip install mlc-ai-cpu -f https://mlc.ai/wheels

Looking in links: https://mlc.ai/wheels
Collecting mlc-ai-cpu
  Downloading https://github.com/mlc-ai/package/releases/download/v0.9.dev0/mlc_ai_cpu-0.17.2-cp310-cp310-manylinux_2_28_x86_64.whl (185.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m185.8/185.8 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mlc-ai-cpu
Successfully installed mlc-ai-cpu-0.17.2


In [2]:
! git clone -b dev_chirag https://github.com/chiragbheemaiah/LPRNet_CSC591.git

Cloning into 'LPRNet_CSC591'...
remote: Enumerating objects: 1094, done.[K
remote: Counting objects: 100% (57/57), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 1094 (delta 37), reused 40 (delta 28), pack-reused 1037 (from 1)[K
Receiving objects: 100% (1094/1094), 21.34 MiB | 9.89 MiB/s, done.
Resolving deltas: 100% (47/47), done.


In [3]:
cd LPRNet_CSC591

/content/LPRNet_CSC591


In [4]:
import tvm
import torch.nn as nn
import torch

## Test Function

In [17]:
from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
from PIL import Image, ImageDraw, ImageFont
from model.LPRNet import build_lprnet
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import *
from torch import optim
import torch.nn as nn
import numpy as np
import argparse
import torch
import time
import cv2
import os

def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(torch.from_numpy(img))
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)

    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

def test(module):
    test_img_dirs = os.path.expanduser(args.test_img_dirs)
    test_dataset = LPRDataLoader(test_img_dirs.split(','), args.img_size, args.lpr_max_len)
    Greedy_Decode_Eval(module, test_dataset, args)

def Greedy_Decode_Eval(module, datasets, args):
    # TestNet = Net.eval()
    epoch_size = len(datasets) // args.test_batch_size
    batch_iterator = iter(DataLoader(datasets, args.test_batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn))

    Tp = 0
    Tn_1 = 0
    Tn_2 = 0
    t1 = time.time()
    for i in range(epoch_size):
        # load train data
        images, labels, lengths = next(batch_iterator)
        start = 0
        targets = []
        for length in lengths:
            label = labels[start:start+length]
            targets.append(label)
            start += length
        targets = np.array([el.numpy() for el in targets])
        imgs = images.numpy().copy()

        if args.cuda:
            images = Variable(images.cuda())
        else:
            images = Variable(images)

        # forward
        # prebs = Net(images)
        # Set input and run
        module.set_input(input_name, tvm.nd.array(images.numpy()))
        module.run()

        # Get output
        tvm_output = module.get_output(0).asnumpy()
        print("Output shape:", tvm_output.shape)
        prebs = tvm_output
        # greedy decode
        # prebs = prebs.cpu().detach().numpy()
        preb_labels = list()
        for i in range(prebs.shape[0]):
            preb = prebs[i, :, :]
            preb_label = list()
            for j in range(preb.shape[1]):
                preb_label.append(np.argmax(preb[:, j], axis=0))
            no_repeat_blank_label = list()
            pre_c = preb_label[0]
            if pre_c != len(CHARS) - 1:
                no_repeat_blank_label.append(pre_c)
            for c in preb_label: # dropout repeate label and blank label
                if (pre_c == c) or (c == len(CHARS) - 1):
                    if c == len(CHARS) - 1:
                        pre_c = c
                    continue
                no_repeat_blank_label.append(c)
                pre_c = c
            preb_labels.append(no_repeat_blank_label)
        for i, label in enumerate(preb_labels):
            # show image and its predict label
            # if args.show:
                # show(imgs[i], label, targets[i])
            if len(label) != len(targets[i]):
                Tn_1 += 1
                continue
            if (np.asarray(targets[i]) == np.asarray(label)).all():
                Tp += 1
            else:
                Tn_2 += 1
    Acc = Tp * 1.0 / (Tp + Tn_1 + Tn_2)
    print("[Info] Test Accuracy: {} [{}:{}:{}:{}]".format(Acc, Tp, Tn_1, Tn_2, (Tp+Tn_1+Tn_2)))
    t2 = time.time()
    print("[Info] Test Speed: {}s 1/{}]".format((t2 - t1) / len(datasets), len(datasets)))


## JIT Trace NN Module

In [18]:
class small_basic_block(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(small_basic_block, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
        )
    def forward(self, x):
        return self.block(x)

class LPRNet(nn.Module):
    def __init__(self, lpr_max_len, phase, class_num, dropout_rate):
        super(LPRNet, self).__init__()
        self.phase = phase
        self.lpr_max_len = lpr_max_len
        self.class_num = class_num
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),  # 2
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
            small_basic_block(ch_in=64, ch_out=128),    # *** 4 ***
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),  # 6
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
            small_basic_block(ch_in=64, ch_out=256),   # 8
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 10
            small_basic_block(ch_in=256, ch_out=256),   # *** 11 ***
            nn.BatchNorm2d(num_features=256),   # 12
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)),  # 14
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1),  # 16
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 18
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
            nn.BatchNorm2d(num_features=class_num),
            nn.ReLU(),  # *** 22 ***
        )
        self.container = nn.Sequential(
            nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1, 1), stride=(1, 1)),
        )

    def forward(self, x):
        keep_features = list()
        for i, layer in enumerate(self.backbone.children()):
            x = layer(x)
            if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]
                keep_features.append(x)

        global_context = list()
        for i, f in enumerate(keep_features):
            if i in [0, 1]:
                f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
            if i in [2]:
                f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)
            f_pow = torch.pow(f, 2)
            f_mean = torch.mean(f_pow)
            f = torch.div(f, f_mean)
            global_context.append(f)

        x = torch.cat(global_context, 1)
        x = self.container(x)
        logits = torch.mean(x, dim=2)

        return logits

def build_lprnet(lpr_max_len=8, phase=False, class_num=66, dropout_rate=0.5):

    Net = LPRNet(lpr_max_len, phase, class_num, dropout_rate)

    if phase == "train":
        return Net.train()
    else:
        return Net.eval()

In [6]:
lprnet = build_lprnet(lpr_max_len=8, phase=False, class_num=68, dropout_rate=0.5)
lprnet.load_state_dict(torch.load("./weights/Final_LPRNet_model.pth",  map_location=torch.device('cpu')))
lprnet.eval()

example_input = torch.randn(1, 3, 24, 94)
traced_model = torch.jit.trace(lprnet, example_input)
traced_model.save("lprnet_traced.pt")

  lprnet.load_state_dict(torch.load("./weights/Final_LPRNet_model.pth",  map_location=torch.device('cpu')))


In [7]:
import tvm
from tvm import relay
from tvm.contrib.download import download_testdata

# Load the TorchScript model
scripted_model = torch.jit.load("lprnet_traced.pt")
scripted_model.eval()

RecursiveScriptModule(
  original_name=LPRNet
  (backbone): RecursiveScriptModule(
    original_name=Sequential
    (0): RecursiveScriptModule(original_name=Conv2d)
    (1): RecursiveScriptModule(original_name=BatchNorm2d)
    (2): RecursiveScriptModule(original_name=ReLU)
    (3): RecursiveScriptModule(original_name=MaxPool3d)
    (4): RecursiveScriptModule(
      original_name=small_basic_block
      (block): RecursiveScriptModule(
        original_name=Sequential
        (0): RecursiveScriptModule(original_name=Conv2d)
        (1): RecursiveScriptModule(original_name=ReLU)
        (2): RecursiveScriptModule(original_name=Conv2d)
        (3): RecursiveScriptModule(original_name=ReLU)
        (4): RecursiveScriptModule(original_name=Conv2d)
        (5): RecursiveScriptModule(original_name=ReLU)
        (6): RecursiveScriptModule(original_name=Conv2d)
      )
    )
    (5): RecursiveScriptModule(original_name=BatchNorm2d)
    (6): RecursiveScriptModule(original_name=ReLU)
    (7): Recu

## Compile into Relay Module

In [8]:
from types import SimpleNamespace

args = {
    'img_size': [94, 24],
    'test_img_dirs': "./data/test",
    'dropout_rate': 0,
    'lpr_max_len': 8,
    'test_batch_size': 100,
    'phase_train': False,
    'num_workers': 8,
    'cuda': False,
    'show': False,
    'pretrained_model': './weights/Final_LPRNet_model.pth'
}

args = SimpleNamespace(**args)

In [9]:
input_shape = (args.test_batch_size, 3, 24, 94)
input_name = "input0"
input_shapes = [(input_name, input_shape)]

# Convert to TVM Relay format
mod, params = relay.frontend.from_pytorch(scripted_model, input_shapes)

In [10]:
print(mod["main"])

fn (%input0: Tensor[(100, 3, 24, 94), float32] /* span=aten::_convolution_0.input0:0:0 */, %aten::_convolution_0.weight: Tensor[(64, 3, 3, 3), float32] /* span=aten::_convolution_0.weight:0:0 */, %aten::_convolution_0.bias: Tensor[(64), float32] /* span=aten::_convolution_0.bias:0:0 */, %aten::batch_norm_0.weight: Tensor[(64), float32] /* span=aten::batch_norm_0.weight:0:0 */, %aten::batch_norm_0.bias: Tensor[(64), float32] /* span=aten::batch_norm_0.bias:0:0 */, %aten::batch_norm_0.running_mean: Tensor[(64), float32] /* span=aten::batch_norm_0.running_mean:0:0 */, %aten::batch_norm_0.running_var: Tensor[(64), float32] /* span=aten::batch_norm_0.running_var:0:0 */, %aten::_convolution_1.weight: Tensor[(32, 64, 1, 1), float32] /* span=aten::_convolution_1.weight:0:0 */, %aten::_convolution_1.bias: Tensor[(32), float32] /* span=aten::_convolution_1.bias:0:0 */, %aten::_convolution_2.weight: Tensor[(32, 32, 3, 1), float32] /* span=aten::_convolution_2.weight:0:0 */, %aten::_convolution_2.

In [11]:
# Define the target device
target = "llvm"
dev = tvm.cuda(0) if target == "cuda" else tvm.cpu()

# Compile the model
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)



In [14]:
from tvm.contrib import graph_executor
module = graph_executor.GraphModule(lib["default"](dev))

In [19]:
test(module)



Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
[Info] Test Accuracy: 0.898 [898:59:43:1000]
[Info] Test Speed: 0.04125318384170532s 1/1000]


## Manual Optimization

In [20]:
# Basic manual optimization passes
# 1. Canonicalization and Simplification
mod_manual = relay.transform.InferType()(mod)
mod_manual = relay.transform.SimplifyInference()(mod_manual)
mod_manual = relay.transform.CanonicalizeOps()(mod_manual)

# 2. Basic Arithmetic Simplification
mod_manual = relay.transform.FoldConstant()(mod_manual)
mod_manual = relay.transform.CombineParallelConv2D()(mod_manual)

# 3. Layout Transformation (if applicable)
# This can help optimize convolution and other spatial operations
mod_manual = relay.transform.AlterOpLayout()(mod_manual)

# 4. Dead Code Elimination
mod_manual = relay.transform.DeadCodeElimination()(mod_manual)

# 5. Optimize memory usage
mod_manual = relay.transform.EliminateCommonSubexpr()(mod_manual)

# # 6. Advanced Optimization Passes
# # These can further optimize the computation graph
# mod_nn = relay.transform.CombineParallelDense()(mod_nn)
# mod_nn = relay.transform.PlanDevices()(mod_nn)

In [21]:
# Define the target device
target = "llvm"
dev = tvm.cuda(0) if target == "cuda" else tvm.cpu()

# Compile the model
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod_manual, target=target, params=params)

In [22]:
from tvm.contrib import graph_executor

# Create a graph executor
module = graph_executor.GraphModule(lib["default"](dev))

In [23]:
test(module)

Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
[Info] Test Accuracy: 0.899 [899:59:42:1000]
[Info] Test Speed: 0.04035191869735718s 1/1000]


## Auto Tuning

In [None]:
import tvm.auto_scheduler as auto_scheduler
from tvm.autotvm.tuner import XGBTuner
from tvm import autotvm

In [None]:
number = 10
repeat = 1
min_repeat_ms = 0  # since we're tuning on a CPU, can be set to 0
timeout = 10  # in seconds

# create a TVM runner
runner = autotvm.LocalRunner(
    number=number,
    repeat=repeat,
    timeout=timeout,
    min_repeat_ms=min_repeat_ms,
    enable_cpu_cache_flush=True,
)

In [None]:
tuning_option = {
    "tuner": "xgb",
    "trials": 20,
    "early_stopping": 100,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner
    ),
    "tuning_records": "lprnet-autotuning.json",
}

In [None]:
# begin by extracting the tasks from the onnx model
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)
# print(tasks)
# Tune the extracted tasks sequentially.
for i, task in enumerate(tasks):
    prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

    # choose tuner
    tuner = "xgb"

    # create tuner
    if tuner == "xgb":
        tuner_obj = XGBTuner(task, loss_type="reg")
    # elif tuner == "xgb_knob":
    #     tuner_obj = XGBTuner(task, loss_type="reg", feature_type="knob")
    # elif tuner == "xgb_itervar":
    #     tuner_obj = XGBTuner(task, loss_type="reg", feature_type="itervar")
    # elif tuner == "xgb_curve":
    #     tuner_obj = XGBTuner(task, loss_type="reg", feature_type="curve")
    # elif tuner == "xgb_rank":
    #     tuner_obj = XGBTuner(task, loss_type="rank")
    # elif tuner == "xgb_rank_knob":
    #     tuner_obj = XGBTuner(task, loss_type="rank", feature_type="knob")
    # elif tuner == "xgb_rank_itervar":
    #     tuner_obj = XGBTuner(task, loss_type="rank", feature_type="itervar")
    # elif tuner == "xgb_rank_curve":
    #     tuner_obj = XGBTuner(task, loss_type="rank", feature_type="curve")
    # elif tuner == "xgb_rank_binary":
    #     tuner_obj = XGBTuner(task, loss_type="rank-binary")
    # elif tuner == "xgb_rank_binary_knob":
    #     tuner_obj = XGBTuner(task, loss_type="rank-binary", feature_type="knob")
    # elif tuner == "xgb_rank_binary_itervar":
    #     tuner_obj = XGBTuner(task, loss_type="rank-binary", feature_type="itervar")
    # elif tuner == "xgb_rank_binary_curve":
    #     tuner_obj = XGBTuner(task, loss_type="rank-binary", feature_type="curve")
    # elif tuner == "ga":
    #     tuner_obj = GATuner(task, pop_size=50)
    # elif tuner == "random":
    #     tuner_obj = RandomTuner(task)
    # elif tuner == "gridsearch":
    #     tuner_obj = GridSearchTuner(task)
    else:
        raise ValueError("Invalid tuner: " + tuner)

    tuner_obj.tune(
        n_trial=min(tuning_option["trials"], len(task.config_space)),
        early_stopping=tuning_option["early_stopping"],
        measure_option=tuning_option["measure_option"],
        callbacks=[
            autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
            autotvm.callback.log_to_file(tuning_option["tuning_records"]),
        ],
    )

[Task  1/13]  Current/Best:    7.57/  14.67 GFLOPS | Progress: (20/20) | 59.73 s Done.
[Task  2/13]  Current/Best:    5.85/  16.11 GFLOPS | Progress: (20/20) | 56.82 s Done.
[Task  3/13]  Current/Best:   11.94/  12.54 GFLOPS | Progress: (20/20) | 75.65 s Done.
[Task  4/13]  Current/Best:   17.76/  17.76 GFLOPS | Progress: (20/20) | 56.15 s Done.
[Task  5/13]  Current/Best:    6.73/  15.62 GFLOPS | Progress: (20/20) | 88.26 s Done.
[Task  6/13]  Current/Best:   13.15/  14.57 GFLOPS | Progress: (20/20) | 48.68 s Done.
[Task  7/13]  Current/Best:    4.97/  14.86 GFLOPS | Progress: (20/20) | 104.96 s Done.
[Task  8/13]  Current/Best:    9.69/  17.46 GFLOPS | Progress: (20/20) | 89.51 s Done.
[Task  9/13]  Current/Best:   14.74/  15.28 GFLOPS | Progress: (20/20) | 133.46 s Done.
[Task 10/13]  Current/Best:    5.09/  12.42 GFLOPS | Progress: (20/20) | 133.30 s Done.
[Task 11/13]  Current/Best:   16.12/  16.70 GFLOPS | Progress: (20/20) | 132.83 s Done.
[Task 12/13]  Current/Best:    5.45/  1

In [None]:
with autotvm.apply_history_best(tuning_option["tuning_records"]):
    with tvm.transform.PassContext(opt_level=3, config={}):
        lib = relay.build(mod, target=target, params=params)

dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))

In [None]:
test(module)

Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
[Info] Test Accuracy: 0.901 [901:57:42:1000]
[Info] Test Speed: 0.03988617014884949s 1/1000]


## Base Performance

In [None]:
! python /content/LPRNet_CSC591/test_LPRNet.py

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))
load pretrained model successful!
[Info] Test Accuracy: 0.896 [896:62:42:1000]
[Info] Test Speed: 0.22646905851364135s 1/1000]


In [None]:
# Save the module (includes weights/parameters)
module_path = "module.tar"
lib.export_library(module_path)
print(f"Module saved to {module_path}")


Module saved to module.tar


## Test Function - Single Image

In [None]:
CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑',
         '苏', '浙', '皖', '闽', '赣', '鲁', '豫', '鄂', '湘', '粤',
         '桂', '琼', '川', '贵', '云', '藏', '陕', '甘', '青', '宁',
         '新',
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
         'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
         'W', 'X', 'Y', 'Z', 'I', 'O', '-'
         ]

CHARS_DICT = {char:i for i, char in enumerate(CHARS)}
len(CHARS)

In [None]:
from PIL import Image
import numpy as np
import torch
from torchvision.transforms import functional as TF

def preprocess_image(image_path, img_size=(94, 24)):
    # Load and resize the image
    img = Image.open(image_path).convert('RGB')
    img = img.resize(img_size)  # Resize to target size
    img = np.array(img).astype('float32')  # Convert to numpy array
    img -= 127.5  # Normalize
    img *= 0.0078125
    img = np.transpose(img, (2, 0, 1))  # Convert HWC -> CHW
    return torch.tensor(img).unsqueeze(0)  # Add batch dimension

def test_single_image(model, image_tensor):
    """
    Test a single image with the TVM-built model and decode the output.
    Args:
        model: The compiled TVM model.
        image_tensor (torch.Tensor): Preprocessed image tensor.
    """
    # Forward pass
    prebs = model(image_tensor)  # Pass the image tensor through the model
    prebs = prebs.cpu().detach().numpy()  # Convert the output to NumPy array

    preb_labels = []  # Initialize list to store final decoded labels
    preb = prebs[0]  # Extract predictions for the single image in the batch

    # Greedy decoding: Iterate over each time-step (over sequence length)
    preb_label = []
    for j in range(preb.shape[1]):  # Iterate over sequence positions (time steps)
        preb_label.append(np.argmax(preb[:, j], axis=0))  # Get the most likely class at each position

    # Remove repeated labels and blank labels
    no_repeat_blank_label = []
    pre_c = -1  # Initialize previous character as -1 (no character)

    for c in preb_label:
        if c == len(CHARS) - 1:  # Blank character (no prediction)
            continue
        if c == pre_c:  # Skip repeated labels
            continue
        no_repeat_blank_label.append(c)
        pre_c = c  # Update previous character

    # Decode characters from the predicted label indices
    predicted_text = ''.join([CHARS[c] for c in no_repeat_blank_label])  # Convert indices to characters
    print("Predicted Labels (Indices):", no_repeat_blank_label)
    print("Predicted Text:", predicted_text)

# Example usage:
image_path = "/content/LPRNet_CSC591/data/test/沪AMS087.jpg"
# model = torch.jit.load("path_to_tvm_model.pt")  # Replace with your TVM-compiled model
image_tensor = preprocess_image(image_path)
# print(image_tensor.shape)
test_single_image(model, image_tensor)


In [None]:
# Set input and run - ignore
module.set_input(input_name, tvm.nd.array(input_data.numpy()))
module.run()

# Get output
tvm_output = module.get_output(0).asnumpy()
print("Output shape:", tvm_output.shape)

In [None]:
# ignore
prebs = tvm_output
preb_labels = []  # Initialize list to store final decoded labels
preb = prebs[0]  # Extract predictions for the single image in the batch

# Greedy decoding: Iterate over each time-step (over sequence length)
preb_label = []
for j in range(preb.shape[1]):  # Iterate over sequence positions (time steps)
    preb_label.append(np.argmax(preb[:, j], axis=0))  # Get the most likely class at each position

# Remove repeated labels and blank labels
no_repeat_blank_label = []
pre_c = -1  # Initialize previous character as -1 (no character)

for c in preb_label:
    if c == len(CHARS) - 1:  # Blank character (no prediction)
        continue
    if c == pre_c:  # Skip repeated labels
        continue
    no_repeat_blank_label.append(c)
    pre_c = c  # Update previous character

# Decode characters from the predicted label indices
predicted_text = ''.join([CHARS[c] for c in no_repeat_blank_label])  # Convert indices to characters
print("Predicted Labels (Indices):", no_repeat_blank_label)
print("Predicted Text:", predicted_text)

In [None]:
import timeit

timing_number = 10
timing_repeat = 10
optimized = (
    np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
    * 1000
    / timing_number
)
optimized = {"mean": np.mean(optimized), "median": np.median(optimized), "std": np.std(optimized)}


print("optimized: %s" % (optimized))
print("unoptimized: %s" % (unoptimized))

In [None]:
# Set input and run
module.set_input(input_name, tvm.nd.array(input_data.numpy()))
module.run()

# Get output
tvm_output = module.get_output(0).asnumpy()
print("Output shape:", tvm_output.shape)

In [None]:
prebs = tvm_output
preb_labels = []  # Initialize list to store final decoded labels
preb = prebs[0]  # Extract predictions for the single image in the batch

# Greedy decoding: Iterate over each time-step (over sequence length)
preb_label = []
for j in range(preb.shape[1]):  # Iterate over sequence positions (time steps)
    preb_label.append(np.argmax(preb[:, j], axis=0))  # Get the most likely class at each position

# Remove repeated labels and blank labels
no_repeat_blank_label = []
pre_c = -1  # Initialize previous character as -1 (no character)

for c in preb_label:
    if c == len(CHARS) - 1:  # Blank character (no prediction)
        continue
    if c == pre_c:  # Skip repeated labels
        continue
    no_repeat_blank_label.append(c)
    pre_c = c  # Update previous character

# Decode characters from the predicted label indices
predicted_text = ''.join([CHARS[c] for c in no_repeat_blank_label])  # Convert indices to characters
print("Predicted Labels (Indices):", no_repeat_blank_label)
print("Predicted Text:", predicted_text)