## Setup

In [1]:
! git clone https://github.com/chiragbheemaiah/LPRNet_CSC591.git

Cloning into 'LPRNet_CSC591'...
remote: Enumerating objects: 1099, done.[K
remote: Counting objects: 100% (62/62), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 1099 (delta 39), reused 42 (delta 29), pack-reused 1037 (from 1)[K
Receiving objects: 100% (1099/1099), 21.34 MiB | 16.67 MiB/s, done.
Resolving deltas: 100% (49/49), done.


In [2]:
cd LPRNet_CSC591

/content/LPRNet_CSC591


## Baseline Accuracy

In [3]:
! python test_LPRNet.py

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))
load pretrained model successful!
[Info] Test Accuracy: 0.899 [899:58:43:1000]
[Info] Test Speed: 0.2044245445728302s 1/1000]


# Model Preparation

In [4]:
from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
from PIL import Image, ImageDraw, ImageFont
from model.LPRNet import build_lprnet
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import *
from torch import optim
import torch.nn as nn
import numpy as np
import argparse
import torch
import time
import cv2
import os
import copy
from types import SimpleNamespace
from collections import OrderedDict

In [5]:
args = {
    'img_size': [94, 24],
    'test_img_dirs': "./data/test",
    'dropout_rate': 0,
    'lpr_max_len': 8,
    'test_batch_size': 100,
    'phase_train': False,
    'num_workers': 8,
    'cuda': False,
    'show': False,
    'pretrained_model': './weights/pruned_model_weights_trial2.pth'
}

args = SimpleNamespace(**args)

In [6]:
import torch.nn as nn
import torch

class small_basic_block(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(small_basic_block, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
        )
    def forward(self, x):
        return self.block(x)

class LPRNet(nn.Module):
    def __init__(self, lpr_max_len, phase, class_num, dropout_rate):
        super(LPRNet, self).__init__()
        self.phase = phase
        self.lpr_max_len = lpr_max_len
        self.class_num = class_num
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),  # 2
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
            small_basic_block(ch_in=64, ch_out=128),    # *** 4 ***
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),  # 6
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
            small_basic_block(ch_in=64, ch_out=256),   # 8
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 10
            small_basic_block(ch_in=256, ch_out=256),   # *** 11 ***
            nn.BatchNorm2d(num_features=256),   # 12
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)),  # 14
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1),  # 16
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 18
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
            nn.BatchNorm2d(num_features=class_num),
            nn.ReLU(),  # *** 22 ***
        )
        self.container = nn.Sequential(
            nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1, 1), stride=(1, 1)),
            # nn.BatchNorm2d(num_features=self.class_num),
            # nn.ReLU(),
            # nn.Conv2d(in_channels=self.class_num, out_channels=self.lpr_max_len+1, kernel_size=3, stride=2),
            # nn.ReLU(),
        )
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        print("in forward of lprnet")
        keep_features = list()
        x = self.quant(x)
        for i, layer in enumerate(self.backbone.children()):
            # print(f"Layer {i}: {layer}")  # Print layer information for debugging

            # Check if layer is MaxPool3d and print input shape before applying it
            if isinstance(layer, nn.MaxPool3d):
                # print(f"Input shape before MaxPool3d: {x.shape}")
                x = x.unsqueeze(0)  # Add a new dimension at index 2 to make it 5D

            x = layer(x)

            # Reshape back to 4D after MaxPool3d
            if isinstance(layer, nn.MaxPool3d):
              # print(f"Output shape after MaxPool3d: {x.shape}")

              x = x.squeeze(0)  # Remove the added dimension to make it 4D again
              # print(f"Output shape after MaxPool3d after squeeze: {x.shape}")

            if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]
                keep_features.append(self.dequant(x))
        # requantize = False
        global_context = list()
        for i, f in enumerate(keep_features):
            if i in [0, 1]:
                f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
            if i in [2]:
                f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)

            # if f.is_quantized:
            #   f = torch.dequantize(f)
            #   requantize = True
            f_pow = torch.pow(f, 2)
            f_mean = torch.mean(f_pow)
            f = torch.div(f, f_mean)
            global_context.append(f)
        x = self.dequant(x)
        # print(x.shape)
        x = torch.cat(global_context, 1)
        # print(x.shape)
        x = self.container(x)
        # print(x.shape)
        logits = torch.mean(x, dim=2)
        # print(logits.shape)
        # x = torch.cat(global_context, 1)
        # if requantize:
        #   x = torch.quantize_per_tensor(x, scale=0.0157, zero_point=64, dtype=torch.quint8) # Example with float, int, and dtype
        #   requantize = False
        # x = self.container(x)
        # logits = torch.mean(x, dim=2)

        return logits

def build_lprnet(lpr_max_len=8, phase=False, class_num=66, dropout_rate=0.5):

    Net = LPRNet(lpr_max_len, phase, class_num, dropout_rate)

    if phase == "train":
        return Net.train()
    else:
        return Net.eval()


In [7]:
lprnet = build_lprnet(lpr_max_len=args.lpr_max_len, phase=args.phase_train, class_num=len(CHARS), dropout_rate=args.dropout_rate)
device = torch.device("cuda:0" if args.cuda else "cpu")
lprnet.to(device)
print("Successful to build network!")

Successful to build network!


In [8]:
# load pretrained model
if args.pretrained_model:
    lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))
    print("load pretrained model successful!")
else:
    print("[Error] Can't found pretrained mode, please check!")

load pretrained model successful!


  lprnet.load_state_dict(torch.load(args.pretrained_model, map_location=torch.device('cpu')))


## Quantization

In [9]:
lprnet.eval()
lprnet.qconfig = torch.ao.quantization.get_default_qconfig('x86')
lprnet.container.qconfig = None
lprnet_with_quant = torch.ao.quantization.prepare(lprnet)
# Verify the structure
lprnet_with_quant



LPRNet(
  (backbone): Sequential(
    (0): Conv2d(
      3, 64, kernel_size=(3, 3), stride=(1, 1)
      (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
    )
    (1): BatchNorm2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
    )
    (2): ReLU()
    (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (4): small_basic_block(
      (block): Sequential(
        (0): Conv2d(
          64, 32, kernel_size=(1, 1), stride=(1, 1)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (1): ReLU()
        (2): Conv2d(
          32, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (3): ReLU()
        (4): Conv2d(
          32, 32, kernel_size=(1, 3), stride=(1, 1), padding=

In [10]:
def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(torch.from_numpy(img))
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)

    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

In [11]:
def Greedy_Decode_Eval(Net, datasets, args):
    # TestNet = Net.eval()
    epoch_size = len(datasets) // args.test_batch_size
    batch_iterator = iter(DataLoader(datasets, args.test_batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn))

    Tp = 0
    Tn_1 = 0
    Tn_2 = 0
    t1 = time.time()
    for i in range(epoch_size):
        # load train data
        images, labels, lengths = next(batch_iterator)
        start = 0
        targets = []
        for length in lengths:
            label = labels[start:start+length]
            targets.append(label)
            start += length
        targets = np.array([el.numpy() for el in targets])
        imgs = images.numpy().copy()
        print(imgs.shape)

        if args.cuda:
            images = Variable(images.cuda())
        else:
            images = Variable(images)

        # forward
        prebs = Net(images)
        # greedy decode
        prebs = prebs.cpu().detach().numpy()
        preb_labels = list()
        for i in range(prebs.shape[0]):
            preb = prebs[i, :, :]
            preb_label = list()
            for j in range(preb.shape[1]):
                preb_label.append(np.argmax(preb[:, j], axis=0))
            no_repeat_blank_label = list()
            pre_c = preb_label[0]
            if pre_c != len(CHARS) - 1:
                no_repeat_blank_label.append(pre_c)
            for c in preb_label: # dropout repeate label and blank label
                if (pre_c == c) or (c == len(CHARS) - 1):
                    if c == len(CHARS) - 1:
                        pre_c = c
                    continue
                no_repeat_blank_label.append(c)
                pre_c = c
            preb_labels.append(no_repeat_blank_label)
        for i, label in enumerate(preb_labels):
            # show image and its predict label
            # if args.show:
            #     show(imgs[i], label, targets[i])
            if len(label) != len(targets[i]):
                Tn_1 += 1
                continue
            if (np.asarray(targets[i]) == np.asarray(label)).all():
                Tp += 1
            else:
                Tn_2 += 1
    Acc = Tp * 1.0 / (Tp + Tn_1 + Tn_2)
    print("[Info] Test Accuracy: {} [{}:{}:{}:{}]".format(Acc, Tp, Tn_1, Tn_2, (Tp+Tn_1+Tn_2)))
    t2 = time.time()
    print("[Info] Test Speed: {}s 1/{}]".format((t2 - t1) / len(datasets), len(datasets)))


In [12]:
def test(model):
    test_img_dirs = os.path.expanduser(args.test_img_dirs)
    test_dataset = LPRDataLoader(test_img_dirs.split(','), args.img_size, args.lpr_max_len)
    Greedy_Decode_Eval(model, test_dataset, args)

In [13]:
test(lprnet_with_quant)



(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
[Info] Test Accuracy: 0.895 [895:70:35:1000]
[Info] Test Speed: 0.05241087055206299s 1/1000]


In [14]:
lprnet_with_quant

LPRNet(
  (backbone): Sequential(
    (0): Conv2d(
      3, 64, kernel_size=(3, 3), stride=(1, 1)
      (activation_post_process): HistogramObserver(min_val=-24.24433135986328, max_val=32.877845764160156)
    )
    (1): BatchNorm2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (activation_post_process): HistogramObserver(min_val=-53.673240661621094, max_val=38.9561767578125)
    )
    (2): ReLU()
    (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (4): small_basic_block(
      (block): Sequential(
        (0): Conv2d(
          64, 32, kernel_size=(1, 1), stride=(1, 1)
          (activation_post_process): HistogramObserver(min_val=-36.72121047973633, max_val=32.40449142456055)
        )
        (1): ReLU()
        (2): Conv2d(
          32, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0)
          (activation_post_process): HistogramObserver(min_val=-78.62523651123047, max_val=66.64794921875)
  

In [15]:
lprnet_with_quant = torch.ao.quantization.convert(lprnet_with_quant)

In [16]:
print(f'Check statistics of the various layers')
lprnet_with_quant

Check statistics of the various layers


LPRNet(
  (backbone): Sequential(
    (0): QuantizedConv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.3160325884819031, zero_point=56)
    (1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (4): small_basic_block(
      (block): Sequential(
        (0): QuantizedConv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), scale=0.4220426678657532, zero_point=71)
        (1): ReLU()
        (2): QuantizedConv2d(32, 32, kernel_size=(3, 1), stride=(1, 1), scale=0.8657320141792297, zero_point=70, padding=(1, 0))
        (3): ReLU()
        (4): QuantizedConv2d(32, 32, kernel_size=(1, 3), stride=(1, 1), scale=2.7328438758850098, zero_point=42, padding=(0, 1))
        (5): ReLU()
        (6): QuantizedConv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), scale=3.644397020339966, zero_point=65)
      )
    )
    (5): QuantizedBatchNorm2d(1

In [17]:
# Print the weights matrix of the model after quantization
print('Weights after quantization')
print(torch.int_repr(lprnet_with_quant.backbone[0].weight()[0][0]))

Weights after quantization
tensor([[ 83, 127,  27],
        [ 40,  27,  17],
        [ 20,  37, -11]], dtype=torch.int8)


In [18]:
print('Original weights: ')
print(lprnet.backbone[0].weight[0][0])
print('')
print(f'Dequantized weights: ')
print(torch.dequantize(lprnet_with_quant.backbone[0].weight()[0][0]))
print('')

Original weights: 
tensor([[ 0.6627,  1.0154,  0.2187],
        [ 0.3205,  0.2122,  0.1328],
        [ 0.1622,  0.2951, -0.0841]], grad_fn=<SelectBackward0>)

Dequantized weights: 
tensor([[ 0.6610,  1.0115,  0.2150],
        [ 0.3186,  0.2150,  0.1354],
        [ 0.1593,  0.2947, -0.0876]])



In [19]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp_delme.p")
    print('Size (KB):', os.path.getsize("temp_delme.p")/1e3)
    os.remove('temp_delme.p')

In [20]:
print('Size of the model before quantization')
print_size_of_model(lprnet)
print('Size of the model after quantization')
print_size_of_model(lprnet_with_quant)

Size of the model before quantization
Size (KB): 1816.802
Size of the model after quantization
Size (KB): 637.382


In [21]:
print('Testing the model after quantization')
test(lprnet_with_quant)

Testing the model after quantization
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
(100, 3, 24, 94)
in forward of lprnet
[Info] Test Accuracy: 0.844 [844:94:62:1000]
[Info] Test Speed: 0.025954334020614624s 1/1000]


## Save Quantized Weights

In [22]:
torch.save(lprnet_with_quant.state_dict(), './weights/lprnet_ptsq_and_pruning_weights.pth')
