### 0. Preliminary
Import necessary modules

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import os
import time
import sys
import torch.quantization

# Specify random seed for repeatable results
torch.manual_seed(191009)

# modify the code if you are using multiply GPUs
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

### 1. Model architecture
This section is used to define MobileNetv2 model and there is a fuse_model function for fusing layers (used for torch quantization).
Several notable modifications to enable torch quantization (it is fine if you do not use torch quantization):
- Replacing addition with quantized. FloatFunctional
- Insert QuantStub and DeQuantStub ar the beginning and the end of the network
- Replace ReLU6 with ReLU

In [2]:
from torch.quantization import QuantStub, DeQuantStub

def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes, momentum=0.1),
            # Replace with ReLU
            nn.ReLU(inplace=False)
        )


class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio, torch_quan=False):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        self.torch_quan = torch_quan
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup

        layers = []
        if expand_ratio != 1:
            # pw
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
        layers.extend([
            # dw
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
            # pw-linear
            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
            nn.BatchNorm2d(oup, momentum=0.1),
        ])
        self.conv = nn.Sequential(*layers)
        # Replace torch.add with floatfunctional
        self.skip_add = nn.quantized.FloatFunctional()

    def forward(self, x):
        if self.use_res_connect:
            if self.torch_quan:
                return self.skip_add.add(x, self.conv(x))
            else:
                return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8, torch_quan=False):
        """
        MobileNet V2 main class

        Args:
            num_classes (int): Number of classes
            width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
            inverted_residual_setting: Network structure
            round_nearest (int): Round the number of channels in each layer to be a multiple of this number
            Set to 1 to turn off rounding
        """
        super(MobileNetV2, self).__init__()
        self.torch_quan = torch_quan
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280

        if inverted_residual_setting is None:
            inverted_residual_setting = [
                # t, c, n, s
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]

        # only check the first element, assuming user knows t,c,n,s are required
        if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
            raise ValueError("inverted_residual_setting should be non-empty "
                             "or a 4-element list, got {}".format(inverted_residual_setting))

        # building first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(3, input_channel, stride=2)]
        # building inverted residual blocks
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t, torch_quan=self.torch_quan))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
        # make it nn.Sequential
        self.features = nn.Sequential(*features)
        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.last_channel, num_classes),
        )
        """there are static quantize"""
        self.quant = QuantStub()
        self.dequant = DeQuantStub()

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        if self.torch_quan:
            x = self.quant(x)
        x = self.features(x)
        x = x.mean([2, 3]) # maybe this function as [average pool + flatten]
        # x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
        x = self.classifier(x)
        if self.torch_quan:
            x = self.dequant(x)
        return x

    # Fuse Conv+BN and Conv+BN+Relu modules prior to quantization
    # This operation does not change the numerics
    def fuse_model(self):
        for m in self.modules():
            if type(m) == ConvBNReLU:
                torch.quantization.fuse_modules(m, ['0', '1', '2'], inplace=True)
            if type(m) == InvertedResidual:
                for idx in range(len(m.conv)):
                    if type(m.conv[idx]) == nn.Conv2d:
                        torch.quantization.fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)


### 2. Helper functions
Define several helper functions to help with model evaluation.

In [3]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

    
def evaluate(model, criterion, data_loader, device=torch.device('cpu')):
    model.eval()
    model.to(device)
    
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    cnt = 0

    with torch.no_grad():
        for image, target in data_loader:
            image = image.to(device)
            target = target.to(device)
            
            output = model(image)
            loss = criterion(output, target)
            cnt += 1
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            print('.', end = '')
            top1.update(acc1[0], image.size(0))
            top5.update(acc5[0], image.size(0))

    return top1, top5


def load_model(model_file, torch_quan=True):
    model = MobileNetV2(torch_quan=torch_quan)
    state_dict = torch.load(model_file)
    model.load_state_dict(state_dict)
    model.to('cpu')
    return model


def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

### 3. Define dataset and data loaders
The dataset used for training and testing is [ImageNet 1000(mini)](https://www.kaggle.com/ifigotin/imagenetmini-1000). With the data downloaded, you can use prepare_data_loaders to read in this data.

In [4]:
def prepare_data_loaders(data_path):
    traindir = os.path.join(data_path, 'train')
    valdir = os.path.join(data_path, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    dataset = torchvision.datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
    print("dataset_train : %d" % (len(dataset)))

    dataset_test = torchvision.datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))
    print("dataset_test : %d" % (len(dataset_test)))
    num_test = len(dataset_test)

    train_sampler = torch.utils.data.RandomSampler(dataset)
    test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=train_batch_size,
        sampler=train_sampler)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=eval_batch_size,
        sampler=test_sampler)

    return data_loader, data_loader_test, num_test

In [5]:
data_path = 'imagenet-mini'
train_batch_size = 30
eval_batch_size = 30

data_loader, data_loader_test, num_test = prepare_data_loaders(data_path)

dataset_train : 34745
dataset_test : 3923


### 4. Load pretrained model and fuse model for torch quantization.
The [pretrained model](https://download.pytorch.org/models/mobilenet_v2-b0353104.pth) is from torchvision.

In [6]:
saved_model_dir = 'model/'
float_model_file = 'mobilenet_v2-b0353104.pth'
scripted_float_model_file = 'mobilenet_quantization_scripted.pth'
scripted_quantized_model_file = 'mobilenet_quantization_scripted_quantized.pth'

criterion = nn.CrossEntropyLoss()

In [7]:
float_model = load_model(saved_model_dir + float_model_file).to('cpu')

# Next, we’ll “fuse modules”; this can both make the model faster by saving on memory access
# while also improving numerical accuracy.
# While this can be used with any model, this is especially common with quantized models.
print('\n Inverted Residual Block: Before fusion \n\n', float_model.features[1].conv)
float_model.eval()

# Fuses modules
float_model.fuse_model()

# Note fusion of Conv+BN+Relu and Conv+Relu
print('\n Inverted Residual Block: After fusion\n\n',float_model.features[1].conv)

print("Size of baseline model")
print_size_of_model(float_model)

top1, top5 = evaluate(float_model, criterion, data_loader_test, torch.device(device))
print('Evaluation accuracy on %d images, %2.2f'%(num_test, top1.avg))
torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file)

del float_model


 Inverted Residual Block: Before fusion 

 Sequential(
  (0): ConvBNReLU(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

 Inverted Residual Block: After fusion

 Sequential(
  (0): ConvBNReLU(
    (0): ConvReLU2d(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (1): ReLU()
    )
    (1): Identity()
    (2): Identity()
  )
  (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
  (2): Identity()
)
Size of baseline model
Size (MB): 13.999657
...................................................................................................................................Evaluation accuracy on 3923 images, 71.63


### 5. Use torch quantization to do the post-training static quantization
In post-training quantization, model just be converted from float to int.

In [8]:
myModel = load_model(saved_model_dir + float_model_file).to('cpu')
myModel.eval()

# Fuse Conv, bn and relu
myModel.fuse_model()

# Specify quantization configuration
# Start with simple min/max range estimation and per-tensor quantization of weights
myModel.qconfig = torch.quantization.get_default_qconfig('fbgemm')
print(myModel.qconfig)
torch.quantization.prepare(myModel, inplace=True)

# Calibrate with the training set
print('Post Training Quantization Prepare: Inserting Observers')
evaluate(myModel, criterion, data_loader_test, torch.device(device))
print('Post Training Quantization: Calibration done')

# Convert to quantized model
myModel.to('cpu')
torch.quantization.convert(myModel, inplace=True)
print('Post Training Quantization: Convert done')

print("Size of model after quantization")
print_size_of_model(myModel)

top1, top5 = evaluate(myModel, criterion, data_loader_test)
print('Evaluation accuracy on %d images, %2.2f'%(num_test, top1.avg))
torch.jit.save(torch.jit.script(myModel), saved_model_dir + scripted_quantized_model_file)

del myModel

QConfig(activation=functools.partial(<class 'torch.quantization.observer.HistogramObserver'>, reduce_range=True), weight=functools.partial(<class 'torch.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric))
Post Training Quantization Prepare: Inserting Observers




...................................................................................................................................Post Training Quantization: Calibration done




Post Training Quantization: Convert done
Size of model after quantization
Size (MB): 3.950561
...................................................................................................................................Evaluation accuracy on 3923 images, 67.17


### 6. Use torch quantization to do the Quantization-aware training
Quantization-aware training (QAT) is the quantization method that typically results in the highest accuracy. With QAT, all weights and activations are “fake quantized” during both the forward and backward passes of training: that is, float values are rounded to mimic int8 values, but all computations are still done with floating point numbers. Thus, all the weight adjustments during training are made while “aware” of the fact that the model will ultimately be quantized; after quantizing, therefore, this method will usually yield higher accuracy than either dynamic quantization or post-training static quantization.

In [9]:
def train_one_epoch(model, criterion, optimizer, data_loader, device, ntrain_batches):
    model.train() # must be in train mode
    model.to(device)
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    avgloss = AverageMeter('Loss', '1.5f')
    cnt = 0

    for image, target in data_loader:
        start_time = time.time()
        print('.', end = '')
        cnt += 1
        image, target = image.to(device), target.to(device)
        output = model(image)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        top1.update(acc1[0], image.size(0))
        top5.update(acc5[0], image.size(0))
        avgloss.update(loss, image.size(0))
        if cnt >= ntrain_batches:
            print('Loss', avgloss.avg)

            print('Training: * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
                  .format(top1=top1, top5=top5))
            return

    print('Full imagenet train set:  * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'
          .format(top1=top1, top5=top5))
    return

In [10]:
qat_model = load_model(saved_model_dir + float_model_file)
qat_model.fuse_model()

qat_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
torch.quantization.prepare_qat(qat_model, inplace=True)

optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001)
num_train_batches = 20
num_epochs = 8

# Train and check accuracy after each epoch
for nepoch in range(num_epochs):
    train_one_epoch(qat_model, criterion, optimizer, data_loader, torch.device(device), num_train_batches)
    if nepoch > 3:
        # Freeze quantizer parameters
        qat_model.apply(torch.quantization.disable_observer)
    if nepoch > 2:
        # Freeze batch norm mean and variance estimates
        qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)

    # Check the accuracy after each epoch
    qat_model.to('cpu')
    quantized_model = torch.quantization.convert(qat_model.eval(), inplace=False)
    quantized_model.eval()
    top1, top5 = evaluate(quantized_model, criterion, data_loader_test)
    print('Epoch %d :Evaluation accuracy on %d images, %2.2f'%(nepoch, num_test, top1.avg))
    
del qat_model

....................Loss tensor(1.7438, device='cuda:0', grad_fn=<DivBackward0>)
Training: * Acc@1 60.167 Acc@5 81.000




...................................................................................................................................Epoch 0 :Evaluation accuracy on 3923 images, 67.30
....................Loss tensor(1.5380, device='cuda:0', grad_fn=<DivBackward0>)
Training: * Acc@1 65.000 Acc@5 84.500
...................................................................................................................................Epoch 1 :Evaluation accuracy on 3923 images, 66.81
....................Loss tensor(1.7409, device='cuda:0', grad_fn=<DivBackward0>)
Training: * Acc@1 61.333 Acc@5 81.167
...................................................................................................................................Epoch 2 :Evaluation accuracy on 3923 images, 67.22
....................Loss tensor(1.7945, device='cuda:0', grad_fn=<DivBackward0>)
Training: * Acc@1 57.500 Acc@5 81.333
.................................................................................................

### 7. Speedup from torch quantization
Test whether quantized model actually perform inference faster.

In [11]:
def run_benchmark(model_file, img_loader, device=torch.device('cpu')):
    elapsed = 0
    model = torch.jit.load(model_file)
    model.eval()
    model.to(device)
    num_batches = 50
    # Run the scripted model on a few batches of images
    for i, (images, target) in enumerate(img_loader):
        images, target = images.to(device), target.to(device)
        if i < num_batches:
            start = time.time()
            output = model(images)
            end = time.time()
            elapsed = elapsed + (end-start)
        else:
            break
    num_images = images.size()[0] * num_batches

    print('Elapsed time: %3.0f ms' % (elapsed/num_images*1000))
    return elapsed


run_benchmark(saved_model_dir + scripted_float_model_file, data_loader_test)
run_benchmark(saved_model_dir + scripted_quantized_model_file, data_loader_test)

Elapsed time: 289 ms
Elapsed time:  26 ms


39.52912259101868

### 8. Quantize the model with self-defined methods
There are many methods for QAT and torch quantization is mainly referred to this [article](https://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf). Therefore, you also define the other quantization algorithm you like to perform QAT.

This section use [LSQ](https://deepai.org/publication/learned-step-size-quantization) as the example.

In [12]:
class LSQ_Conv2D(nn.Conv2d):
    def __init__(self, m: nn.Conv2d, bits, weight_quantizer, act_quantizer):
        super(LSQ_Conv2D, self).__init__(
            in_channels=m.in_channels,
            out_channels=m.out_channels,
            kernel_size=m.kernel_size,
            stride=m.stride,
            padding=m.padding,
            dilation=m.dilation,
            groups=m.groups,
            bias=True if m.bias is not None else False,
            padding_mode=m.padding_mode
        )

        self.weight = nn.Parameter(m.weight.detach())
        self.bits = bits

        self.weight_quantizer = weight_quantizer
        self.weight_quantizer.init_step_size(m.weight)

        self.act_quantizer = act_quantizer

    def forward(self, x):
        quantized_weight = self.weight_quantizer(self.weight)

        quantized_act = self.act_quantizer(x)

        # quantized_act = x

        return F.conv2d(quantized_act, quantized_weight, self.bias, 
                        self.stride, self.padding, self.dilation, self.groups)


class LSQ_Linear(nn.Linear):
    def __init__(self, m: nn.Conv2d, bits, weight_quantizer, act_quantizer):
        super(LSQ_Linear, self).__init__(
            in_features=m.in_features,
            out_features=m.out_features,
            bias=True if m.bias is not None else False)

        self.weight = nn.Parameter(m.weight.detach())
        self.bits = bits
        self.weight_quantizer = weight_quantizer

        self.weight_quantizer.init_step_size(m.weight)
        self.act_quantizer = act_quantizer

    def forward(self, x):
        quantized_weight = self.weight_quantizer(self.weight)

        quantized_act = self.act_quantizer(x)

        # quantized_act = x

        return F.linear(quantized_act, quantized_weight, self.bias)


QuanModuleMapping = {
    nn.Conv2d: LSQ_Conv2D,
    nn.Linear: LSQ_Linear
}

In [13]:
class LSQ_Quantizer(nn.Module):
    def __init__(self, bits, is_activation=False):
        super(LSQ_Quantizer, self).__init__()

        self.bits = bits

        if(is_activation):
            self.Qn = 0
            self.Qp = 2 ** bits - 1
        else:
            self.Qn = -2**(bits - 1)
            self.Qp = 2 ** (bits - 1) - 1

        self.s = nn.Parameter(torch.Tensor([1.0]))

    def init_step_size(self, x):
        self.s = nn.Parameter(
            x.detach().abs().mean() * 2 / (self.Qp) ** 0.5)

    def grad_scale(self, x, scale):
        y_out = x
        y_grad = x * scale

        y = y_out.detach() - y_grad.detach() + y_grad

        return y

    def round_pass(self, x):
        y_out = x.round()
        y_grad = x
        y = y_out.detach() - y_grad.detach() + y_grad

        return y

    def forward(self, x):
        scale_factor = 1 / (x.numel() * self.Qp) ** 0.5

        scale = self.grad_scale(self.s, scale_factor)
        x = x / scale
        x = x.clamp(self.Qn, self.Qp)

        x_bar = self.round_pass(x)

        x_hat = x_bar * scale

        return x_hat

In [14]:
def quantize_model(model, bits=8):
    def helper(child: nn.Module):
        for n, c in child.named_children():
            if type(c) in QuanModuleMapping.keys():
                for full_name, m in model.named_modules():
                    if c is m:
                        child.add_module(n, modules_to_replace.pop(full_name))
                        break
            else:
                helper(c)
    
    modules_to_replace = dict()
    for name, module in model.named_modules():
        if type(module) in QuanModuleMapping.keys():
            modules_to_replace[name] = QuanModuleMapping[type(module)](
                module,
                bits,
                LSQ_Quantizer(bits, False),
                LSQ_Quantizer(bits, True)
            )

    helper(model)
    return model

In [15]:
lsq_model = load_model(saved_model_dir + float_model_file, False)
lsq_model = quantize_model(lsq_model)

optimizer = torch.optim.SGD(lsq_model.parameters(), lr = 0.003, momentum=0.9)
num_train_batches = 500
num_epochs = 8

# Train and check accuracy after each epoch
for nepoch in range(num_epochs):
    train_one_epoch(lsq_model, criterion, optimizer, data_loader, torch.device(device), num_train_batches)

    # Check the accuracy after each epoch
    top1, top5 = evaluate(lsq_model, criterion, data_loader_test, torch.device(device))
    print('Epoch %d :Evaluation accuracy on %d images, %2.2f'%(nepoch, num_test, top1.avg))
    
del lsq_model

....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................Loss tensor(7.1064, device='cuda:0', grad_fn=<DivBackward0>)
Training: * Acc@1 0.187 Acc@5 0.787
...................................................................................................................................Epoch 0 :Evaluation accuracy on 3923 images, 0.18
..............................................................................................................................................................................................................................

### 9. Project Description
In this project, you need to quantize a Yolo network to do the objection detection task.
- The DL framework should be Pytorch
- Requires dataset is coco2017
- The bit number of quantized model weight is up to 8

### Reference:
- [MobileNetV2 implementation and helper functions](https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv2.py)
- [The usage of torch quantization](https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html)
- [Learnable Step Size Quantization(LSQ) implementation](https://github.com/Kelvinyu1117/LSQ-implementation)