## Installation & Setup

In [1]:
!python3 -m  pip install mlc-ai-cpu -f https://mlc.ai/wheels

Looking in links: https://mlc.ai/wheels
Collecting mlc-ai-cpu
  Downloading https://github.com/mlc-ai/package/releases/download/v0.9.dev0/mlc_ai_cpu-0.17.2-cp310-cp310-manylinux_2_28_x86_64.whl (185.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m185.8/185.8 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mlc-ai-cpu
Successfully installed mlc-ai-cpu-0.17.2


In [2]:
! git clone https://github.com/Yogesh31Hasabe/NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet.git

Cloning into 'NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet'...
remote: Enumerating objects: 1027, done.[K
remote: Counting objects: 100% (1027/1027), done.[K
remote: Compressing objects: 100% (1022/1022), done.[K
remote: Total 1027 (delta 4), reused 1021 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (1027/1027), 19.08 MiB | 25.92 MiB/s, done.
Resolving deltas: 100% (4/4), done.


In [3]:
cd NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet

/content/NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet


In [7]:
import tvm
from tvm import relay
from tvm.runtime import load_param_dict
from tvm.contrib import graph_executor
import torch
from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
from PIL import Image, ImageDraw, ImageFont
from model.LPRNet import build_lprnet
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import *
from torch import optim
import torch.nn as nn
import numpy as np
import argparse
import torch
import time
import cv2
import os

## Baseline Model Accuracy

In [4]:
! python /content/NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet/test_LPRNet.py

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))
load pretrained model successful!
[Info] Test Accuracy: 0.897 [897:61:42:1000]
[Info] Test Speed: 0.2118074233531952s 1/1000]


In [5]:
from types import SimpleNamespace

args = {
    'img_size': [94, 24],
    'test_img_dirs': "./data/test",
    'dropout_rate': 0,
    'lpr_max_len': 8,
    'test_batch_size': 100,
    'phase_train': False,
    'num_workers': 2,
    'cuda': False,
    'show': False,
    'pretrained_model': './weights/Final_LPRNet_model.pth'
}

args = SimpleNamespace(**args)

## Combining Model & MLC Optimizations : `Pruning` & `Auto-Tuning`

In [8]:
target = "llvm"
dev = tvm.cuda(0) if target == "cuda" else tvm.cpu()

In [9]:
input_shape = (args.test_batch_size, 3, 24, 94)
input_name = "input0"

In [10]:
from collections import OrderedDict
import tvm
from tvm.contrib import graph_executor
from tvm.relay import save_param_dict

# Load the compiled module
module_path = "./module.tar"
loaded_lib = tvm.runtime.load_module(module_path)

# Create a GraphModule runtime
dev = tvm.device("llvm", 0)  # Replace "llvm" with your target (e.g., "cuda" for GPU)
runtime_module = graph_executor.GraphModule(loaded_lib["default"](dev))

# Load PyTorch pruned weights
pth_file = "./weights/lprnet_model_optimization_pruning.pth"  # Path to pruned weights
torch_weights = torch.load(pth_file)

# Process pruned weights and prepare for TVM
pruned_state_dict = OrderedDict()
for key, value in torch_weights.items():
    # Check for pruned weights (zeroed-out values)
    if value.numel() == 0:  # Skip empty tensors if present
        continue
    pruned_state_dict[key] = value.cpu().detach().numpy()  # Convert to NumPy arrays

# Convert PyTorch weights to TVM NDArray format
new_params = {}
for key, value in pruned_state_dict.items():
    new_params[key] = tvm.nd.array(value)
print("Pruned weights converted for TVM.")

# Serialize the pruned weights into a binary format
param_bytes = save_param_dict(new_params)

# Replace the weights in the runtime using serialized weights
runtime_module.load_params(param_bytes)
print("Pruned weights replaced successfully.")

Pruned weights converted for TVM.
Pruned weights replaced successfully.


  torch_weights = torch.load(pth_file)


## Test Function - 1 : Accuracy & Speed

In [11]:
def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(torch.from_numpy(img))
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)

    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

def test(module):
    test_img_dirs = os.path.expanduser(args.test_img_dirs)
    test_dataset = LPRDataLoader(test_img_dirs.split(','), args.img_size, args.lpr_max_len)
    Greedy_Decode_Eval(module, test_dataset, args)

def Greedy_Decode_Eval(module, datasets, args):
    # TestNet = Net.eval()
    epoch_size = len(datasets) // args.test_batch_size
    batch_iterator = iter(DataLoader(datasets, args.test_batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn))

    Tp = 0
    Tn_1 = 0
    Tn_2 = 0
    t1 = time.time()
    for i in range(epoch_size):
        # load train data
        images, labels, lengths = next(batch_iterator)
        start = 0
        targets = []
        for length in lengths:
            label = labels[start:start+length]
            targets.append(label)
            start += length
        targets = np.array([el.numpy() for el in targets])
        imgs = images.numpy().copy()

        if args.cuda:
            images = Variable(images.cuda())
        else:
            images = Variable(images)

        # forward
        # prebs = Net(images)
        # Set input and run
        module.set_input(input_name, tvm.nd.array(images.numpy()))
        module.run()

        # Get output
        tvm_output = module.get_output(0).asnumpy()
        print("Output shape:", tvm_output.shape)
        prebs = tvm_output
        # greedy decode
        # prebs = prebs.cpu().detach().numpy()
        preb_labels = list()
        for i in range(prebs.shape[0]):
            preb = prebs[i, :, :]
            preb_label = list()
            for j in range(preb.shape[1]):
                preb_label.append(np.argmax(preb[:, j], axis=0))
            no_repeat_blank_label = list()
            pre_c = preb_label[0]
            if pre_c != len(CHARS) - 1:
                no_repeat_blank_label.append(pre_c)
            for c in preb_label: # dropout repeate label and blank label
                if (pre_c == c) or (c == len(CHARS) - 1):
                    if c == len(CHARS) - 1:
                        pre_c = c
                    continue
                no_repeat_blank_label.append(c)
                pre_c = c
            preb_labels.append(no_repeat_blank_label)
        for i, label in enumerate(preb_labels):
            # show image and its predict label
            # if args.show:
                # show(imgs[i], label, targets[i])
            if len(label) != len(targets[i]):
                Tn_1 += 1
                continue
            if (np.asarray(targets[i]) == np.asarray(label)).all():
                Tp += 1
            else:
                Tn_2 += 1
    Acc = Tp * 1.0 / (Tp + Tn_1 + Tn_2)
    print("[Info] Test Accuracy: {} [{}:{}:{}:{}]".format(Acc, Tp, Tn_1, Tn_2, (Tp+Tn_1+Tn_2)))
    t2 = time.time()
    print("[Info] Test Speed: {}s 1/{}]".format((t2 - t1) / len(datasets), len(datasets)))


In [12]:
print('Testing the model after Combined Optimization \n')
test(runtime_module)
print("\n")

Testing the model after Combined Optimization 

Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
Output shape: (100, 68, 18)
[Info] Test Accuracy: 0.9 [900:61:39:1000]
[Info] Test Speed: 0.03238273191452026s 1/1000]




## Test Function - 2 : Size

In [17]:
def print_size_of_module(module_path, module_name="Module"):
    if os.path.exists(module_path):
        module_size_kb = os.path.getsize(module_path) / 1e3  # Size in KB
        print(f"{module_name} Size (KB): {module_size_kb:.2f}")
    else:
        print(f"{module_name} not found at {module_path}")


In [18]:
print('Size of the Combined Optimized Module:')
module_path = "./module.tar"
print_size_of_module(module_path, module_name="Combined Optimized Module")

Size of the Combined Optimized Module:
Combined Optimized Module Size (KB): 850.87
