## Installation & Setup

In [1]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
import torch
torch.backends.quantized.engine = 'fbgemm'

Looking in indexes: https://download.pytorch.org/whl/cpu


In [2]:
! git clone https://github.com/Yogesh31Hasabe/NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet.git

Cloning into 'NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet'...
remote: Enumerating objects: 1019, done.[K
remote: Counting objects: 100% (1019/1019), done.[K
remote: Compressing objects: 100% (1015/1015), done.[K
remote: Total 1019 (delta 1), reused 1015 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (1019/1019), 17.81 MiB | 19.09 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [3]:
cd NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet

/content/NCSU-CSC_591-RealTime_AI_and_Machine_Learning_Systems-CourseProject-LPRNet


In [5]:
from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
from PIL import Image, ImageDraw, ImageFont
from model.LPRNet import build_lprnet
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import *
from torch import optim
import torch.nn as nn
import numpy as np
import argparse
import torch
import time
import cv2
import os
import copy
from types import SimpleNamespace
from collections import OrderedDict

In [6]:
args = {
    'img_size': [94, 24],
    'test_img_dirs': "./data/test",
    'dropout_rate': 0,
    'lpr_max_len': 8,
    'test_batch_size': 100,
    'phase_train': False,
    'num_workers': 2,
    'cuda': False,
    'show': False,
    'pretrained_model': './weights/Final_LPRNet_model.pth'
}

args = SimpleNamespace(**args)

In [7]:
lprnet = build_lprnet(lpr_max_len=args.lpr_max_len, phase=args.phase_train, class_num=len(CHARS), dropout_rate=args.dropout_rate)
device = torch.device("cuda:0" if args.cuda else "cpu")
lprnet.to(device)
print("Network Built Successfully !! \n")

Successful to build network!


In [8]:
# load pretrained model
if args.pretrained_model:
    lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))
    print("Pretrained Model loaded successfully !!")
else:
    print("[Error] Can't found pretrained mode, please check!")

Pretrained Model loaded successfully !!


  lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))


## Baseline Model Accuracy

In [4]:
! python test_LPRNet.py

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))
load pretrained model successful!
[Info] Test Accuracy: 0.901 [901:61:38:1000]
[Info] Test Speed: 0.21121056842803956s 1/1000]


## Quantization

In [9]:
lprnet.eval()
lprnet.qconfig = torch.ao.quantization.get_default_qconfig('x86')
lprnet_quant = torch.ao.quantization.prepare(lprnet)

lprnet_quant



LPRNet(
  (backbone): Sequential(
    (0): Conv2d(
      3, 64, kernel_size=(3, 3), stride=(1, 1)
      (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
    )
    (1): BatchNorm2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
    )
    (2): ReLU()
    (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (4): small_basic_block(
      (block): Sequential(
        (0): Conv2d(
          64, 32, kernel_size=(1, 1), stride=(1, 1)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (1): ReLU()
        (2): Conv2d(
          32, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (3): ReLU()
        (4): Conv2d(
          32, 32, kernel_size=(1, 3), stride=(1, 1), padding=

### Quantize

In [10]:
lprnet_with_quant = torch.ao.quantization.convert(lprnet_quant)



In [11]:
lprnet_with_quant

LPRNet(
  (backbone): Sequential(
    (0): QuantizedConv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0)
    (1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (4): small_basic_block(
      (block): Sequential(
        (0): QuantizedConv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
        (1): ReLU()
        (2): QuantizedConv2d(32, 32, kernel_size=(3, 1), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 0))
        (3): ReLU()
        (4): QuantizedConv2d(32, 32, kernel_size=(1, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(0, 1))
        (5): ReLU()
        (6): QuantizedConv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
      )
    )
    (5): QuantizedBatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Re

In [12]:
# Print the weights matrix of the model after quantization
print('Weights after quantization')
print(torch.int_repr(lprnet_with_quant.backbone[0].weight()[0][0]))

Weights after quantization
tensor([[ 83, 127,  27],
        [ 40,  27,  17],
        [ 20,  37, -11]], dtype=torch.int8)


In [13]:
print('Original weights: ')
print(lprnet.backbone[0].weight[0][0])
print('')
print(f'Dequantized weights: ')
print(torch.dequantize(lprnet_with_quant.backbone[0].weight()[0][0]))
print('')

Original weights: 
tensor([[ 0.6627,  1.0154,  0.2187],
        [ 0.3205,  0.2122,  0.1328],
        [ 0.1622,  0.2951, -0.0841]], grad_fn=<SelectBackward0>)

Dequantized weights: 
tensor([[ 0.6610,  1.0115,  0.2150],
        [ 0.3186,  0.2150,  0.1354],
        [ 0.1593,  0.2947, -0.0876]])



## Test Function - 1 : Size

In [14]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp_delme.p")
    print('Size (KB):', os.path.getsize("temp_delme.p")/1e3)
    os.remove('temp_delme.p')

In [15]:
print('Size of the model before quantization')
print_size_of_model(lprnet)
print('Size of the model after quantization')
print_size_of_model(lprnet_with_quant)

Size of the model before quantization
Size (KB): 1816.738
Size of the model after quantization
Size (KB): 533.576


## Save Model Weights

In [16]:
torch.save(lprnet_with_quant.state_dict(), './weights/lprnet_model_optimization_quantization.pth')

## Test Function - 2 : Accuracy & Speed

In [18]:
def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(torch.from_numpy(img))
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)

    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

def Greedy_Decode_Eval(Net, datasets, args):
    # TestNet = Net.eval()
    epoch_size = len(datasets) // args.test_batch_size
    batch_iterator = iter(DataLoader(datasets, args.test_batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn))

    Tp = 0
    Tn_1 = 0
    Tn_2 = 0
    t1 = time.time()
    for i in range(epoch_size):
        # load train data
        images, labels, lengths = next(batch_iterator)
        start = 0
        targets = []
        for length in lengths:
            label = labels[start:start+length]
            targets.append(label)
            start += length
        targets = np.array([el.numpy() for el in targets])
        imgs = images.numpy().copy()
        # print(imgs.shape)

        if args.cuda:
            images = Variable(images.cuda())
        else:
            images = Variable(images)

        # forward
        prebs = Net(images)
        # greedy decode
        prebs = prebs.cpu().detach().numpy()
        preb_labels = list()
        for i in range(prebs.shape[0]):
            preb = prebs[i, :, :]
            preb_label = list()
            for j in range(preb.shape[1]):
                preb_label.append(np.argmax(preb[:, j], axis=0))
            no_repeat_blank_label = list()
            pre_c = preb_label[0]
            if pre_c != len(CHARS) - 1:
                no_repeat_blank_label.append(pre_c)
            for c in preb_label: # dropout repeate label and blank label
                if (pre_c == c) or (c == len(CHARS) - 1):
                    if c == len(CHARS) - 1:
                        pre_c = c
                    continue
                no_repeat_blank_label.append(c)
                pre_c = c
            preb_labels.append(no_repeat_blank_label)
        for i, label in enumerate(preb_labels):
            # show image and its predict label
            # if args.show:
            #     show(imgs[i], label, targets[i])
            if len(label) != len(targets[i]):
                Tn_1 += 1
                continue
            if (np.asarray(targets[i]) == np.asarray(label)).all():
                Tp += 1
            else:
                Tn_2 += 1
    Acc = Tp * 1.0 / (Tp + Tn_1 + Tn_2)
    print("[Info] Test Accuracy: {} [{}:{}:{}:{}]".format(Acc, Tp, Tn_1, Tn_2, (Tp+Tn_1+Tn_2)))
    t2 = time.time()
    print("[Info] Test Speed: {}s 1/{}]".format((t2 - t1) / len(datasets), len(datasets)))

def test(model):
    test_img_dirs = os.path.expanduser(args.test_img_dirs)
    test_dataset = LPRDataLoader(test_img_dirs.split(','), args.img_size, args.lpr_max_len)
    Greedy_Decode_Eval(model, test_dataset, args)

### Testing LPRNet Accuracy with Dequantized Weights

In [19]:
lprnet_dequant = build_lprnet(lpr_max_len=args.lpr_max_len, phase=args.phase_train, class_num=len(CHARS), dropout_rate=args.dropout_rate)
device = torch.device("cuda:0" if args.cuda else "cpu")
lprnet_dequant.to(device)
print("Successfully built network!")

Successfully built network!


In [20]:
weights_path = './weights/lprnet_model_optimization_quantization.pth'
quantized_weights_state_dict = torch.load(weights_path)

  quantized_weights_state_dict = torch.load(weights_path)
  device=storage.device,


In [21]:
dequantized_state_dict = OrderedDict()
for key, value in quantized_weights_state_dict.items():
    if 'scale' in key or 'zero_point' in key:
        continue
    if value.is_quantized:
        dequantized_state_dict[key] = value.dequantize()
    else:
        dequantized_state_dict[key] = value

In [22]:
lprnet_dequant.load_state_dict(dequantized_state_dict)

<All keys matched successfully>

In [23]:
print('Testing the model after quantization')
test(lprnet_dequant)

Testing the model after quantization
[Info] Test Accuracy: 0.898 [898:59:43:1000]
[Info] Test Speed: 0.033904277086257935s 1/1000]
