# ResNet kvantizácia

In [1]:
import torch
import torchvision
import distiller
from distiller.models import create_model


print(torch.__version__)
print(torchvision.__version__)

1.3.1
0.4.2


In [2]:
model = create_model(pretrained=True,dataset='imagenet',arch='resnet50') 

In [3]:
model

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): DistillerBottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=

In [4]:
from torchvision import transforms
# from torch.utils.data import DataLoader

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
preprocessing = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

batch_size = 20
num_workers = 1
dataset = torchvision.datasets.ImageFolder('/home/bohumil/FIIT/BP/BP/Zdroje_kod/imagenet/train'
                                           ,preprocessing)

small, big = torch.utils.data.random_split(dataset,[7000, len(dataset)-7000])
 
dataloader = torch.utils.data.DataLoader(small,batch_size=batch_size,
                                         num_workers=num_workers,shuffle=True)


In [5]:
from resnet_output import resnet_output

def target_labels(dataset,target):
    list = target.tolist()
    for i in range(len(list)):
        list[i] = dataset.classes[list[i]]
        list[i] = resnet_output[list[i]]
    return torch.LongTensor(list)

In [6]:
# z <distiller_root>/jupyter/post_train_quant_convert_pytorch.ipynb
import torchnet as tnt
import math
import numpy as np


def eval_model(data_loader, model, device='cpu', print_freq=10):
    # print('Evaluation model ', model.arch)
    
    criterion = torch.nn.CrossEntropyLoss().to(device)
    
    loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True, topk=(1, 5))
    # apmeter = tnt.meter.APMeter()

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = math.ceil(total_samples / batch_size)
    print('{0} samples ({1} per mini-batch)'.format(total_samples, batch_size))

    # Switch to evaluation mode
    model.eval()

    for step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(device), target.to(device)
            # compute output from model
            output = model(inputs)
            target = target_labels(dataset,target).to(device)
            # compute loss and measure accuracy
            loss.add(criterion(output, target).item())
            classerr.add(output.data, target)

            if (step + 1) % print_freq == 0:
                print('[{:3d}/{:3d}] Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
                      step + 1, total_steps, classerr.value(1), classerr.value(5), loss.mean), flush=True)
    print('----------')
    print('Overall ==> Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}  PPL: {:.3f}'.format(
        classerr.value(1), classerr.value(5), loss.mean, np.exp(loss.mean)), flush=True)

In [7]:
import logging
def config_notebooks_logger():
    logging.config.fileConfig('logging.conf')
    msglogger = logging.getLogger()
    msglogger.info('Logging configured successfully')
    return msglogger

In [8]:
import argparse
import distiller

msglogger = config_notebooks_logger()

parser = argparse.ArgumentParser()
distiller.quantization.add_post_train_quant_args(parser)
args = parser.parse_args(args= [])
# args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet50_imagenet_post_train.yaml'

Logging configured successfully


Correct way of getting statistics

In [9]:
cpu_model = distiller.make_non_parallel_copy(model)

In [14]:
from distiller.data_loggers import collect_quant_stats, QuantCalibrationStatsCollector, collector_context


args.qe_calibration = 0.2
if args.qe_calibration:
    
    cpu_model = distiller.make_non_parallel_copy(model).cpu()
    
    distiller.utils.assign_layer_fq_names(cpu_model)
    msglogger.info("Generating quantization calibration stats based on {0} users".format(args.qe_calibration))
    collector = distiller.data_loggers.QuantCalibrationStatsCollector(cpu_model)
    
    part = int(len(dataset)*args.qe_calibration)
    batch_size = 20
    num_workers = 1
    small, big = torch.utils.data.random_split(dataset,[part, len(dataset)-part])
    stat_loader = torch.utils.data.DataLoader(small,batch_size=batch_size,
                                         num_workers=num_workers,shuffle=True)
    
    with collector_context(collector):
        eval_model(stat_loader,cpu_model,'cpu',print_freq=30)
        # Here call your model evaluation function, making sure to execute only
        # the portion of the dataset specified by the qe_calibration argument
    yaml_path = './resnet50_quantization_stats.yaml'
    collector.save(yaml_path)

Generating quantization calibration stats based on 0.2 users


3887 samples (20 per mini-batch)
[ 30/195] Top1: 86.333  Top5: 97.500  Loss: 0.516
[ 60/195] Top1: 87.083  Top5: 97.750  Loss: 0.478
[ 90/195] Top1: 87.111  Top5: 97.722  Loss: 0.463
[120/195] Top1: 87.000  Top5: 97.667  Loss: 0.465
[150/195] Top1: 87.367  Top5: 97.533  Loss: 0.466
[180/195] Top1: 87.306  Top5: 97.417  Loss: 0.470
----------
Overall ==> Top1: 87.471  Top5: 97.453  Loss: 0.464  PPL: 1.591


In [12]:
if torch.cuda.is_available():
    %time eval_model(dataloader,model,'cuda', print_freq=40)

7000 samples (40 per mini-batch)
[ 40/175] Top1: 87.875  Top5: 97.500  Loss: 0.453
[ 80/175] Top1: 87.656  Top5: 97.500  Loss: 0.463
[120/175] Top1: 87.396  Top5: 97.667  Loss: 0.459
[160/175] Top1: 87.516  Top5: 97.703  Loss: 0.456
----------
Overall ==> Top1: 87.500  Top5: 97.643  Loss: 0.460  PPL: 1.583
CPU times: user 4min 54s, sys: 1.16 s, total: 4min 55s
Wall time: 4min 54s


In [10]:
args.quantize_eval = True

In [11]:
from copy import deepcopy
def eval_quantized(model, args):
    if args.quantize_eval:
        quantizer = distiller.quantization.PostTrainLinearQuantizer.from_args(deepcopy(model), args)
        # dummy = distiller.get_dummy_input(model.input_shape)
        dummy = distiller.get_dummy_input(input_shape=model.input_shape)
        quantizer.prepare_model(dummy)
        eval_model(dataloader, quantizer.model, 'cuda', print_freq=30)

In [12]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 87.333  Top5: 97.833  Loss: 0.482
[ 60/350] Top1: 87.250  Top5: 96.833  Loss: 0.515
[ 90/350] Top1: 86.833  Top5: 97.056  Loss: 0.502
[120/350] Top1: 86.917  Top5: 97.333  Loss: 0.486
[150/350] Top1: 87.233  Top5: 97.433  Loss: 0.473
[180/350] Top1: 87.417  Top5: 97.417  Loss: 0.468
[210/350] Top1: 87.381  Top5: 97.262  Loss: 0.473
[240/350] Top1: 87.625  Top5: 97.417  Loss: 0.463
[270/350] Top1: 87.537  Top5: 97.389  Loss: 0.461
[300/350] Top1: 87.650  Top5: 97.517  Loss: 0.455
[330/350] Top1: 87.606  Top5: 97.530  Loss: 0.457
----------
Overall ==> Top1: 87.286  Top5: 97.471  Loss: 0.464  PPL: 1.591


4 bit quantization

## Run 1
```python
class: PostTrainLinearQuantizer
bits_activations: 4
bits_parameters: 4
bits_accum: 16
mode: ASYMMETRIC_UNSIGNED
per_channel_wts: True
clip_acts: AVG

```

In [13]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit.yaml'
eval_quantized(model, args);

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 18.000  Top5: 36.667  Loss: 5.441
[ 60/350] Top1: 17.500  Top5: 36.333  Loss: 5.473
[ 90/350] Top1: 18.111  Top5: 36.444  Loss: 5.376
[120/350] Top1: 18.750  Top5: 36.708  Loss: 5.398
[150/350] Top1: 19.133  Top5: 36.433  Loss: 5.406
[180/350] Top1: 19.000  Top5: 36.222  Loss: 5.425
[210/350] Top1: 19.333  Top5: 36.381  Loss: 5.414
[240/350] Top1: 19.146  Top5: 36.375  Loss: 5.400
[270/350] Top1: 19.167  Top5: 36.500  Loss: 5.400
[300/350] Top1: 19.483  Top5: 36.983  Loss: 5.359
[330/350] Top1: 19.712  Top5: 37.333  Loss: 5.326
----------
Overall ==> Top1: 19.600  Top5: 37.086  Loss: 5.334  PPL: 207.168


## Uprava parametrov
## Run 2
/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit2.yaml

```python
quantizers:
  post_train_quantizer:
    class: PostTrainLinearQuantizer
    bits_activations: 4
    bits_parameters: 4
    bits_accum: 16

    mode: ASYMMETRIC_UNSIGNED
    
    model_activation_stats: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
    per_channel_wts: True
    clip_acts: AVG

    overrides:
      fc:
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [14]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit2.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit2.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[

7000 samples (20 per mini-batch)


KeyboardInterrupt: 

In [None]:
```python
 overrides:
    # First and last layers in 8-bits
      conv1:
        bits_weights: 8
        bits_activations: 8
      fc:
        bits_weights: 8
        bits_activations: 8
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [None]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit3.yaml'
eval_quantized(model, args)

## Mixed model

```python
quantizers:
  post_train_quantizer:
    class: PostTrainLinearQuantizer
    bits_activations: 8
    bits_parameters: 4
    bits_accum: 16

    mode: ASYMMETRIC_UNSIGNED
   
    model_activation_stats: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
    per_channel_wts: True
    clip_acts: AVG

    overrides:
    # First and last layers in 8-bits
      conv1:
        bits_weights: 8
        bits_activations: 8
      fc:
        bits_weights: 8
        bits_activations: 8
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [None]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit4.yaml'
eval_quantized(model, args)

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


def evaluate(model, criterion, data_loader, neval_batches):
    model.eval()
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    cnt = 0
    with torch.no_grad():
        for image, target in data_loader:
            output = model(image)
            loss = criterion(output, target)
            cnt += 1
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            print('.', end = '')
            top1.update(acc1[0], image.size(0))
            top5.update(acc5[0], image.size(0))
            if cnt >= neval_batches:
                 return top1, top5

    return top1, top5

In [None]:
num_eval_batches = 2
top1, top5 = evaluate(distiller.make_non_parallel_copy(model).cpu(), torch.nn.CrossEntropyLoss(), train_loader_gpu, neval_batches=20)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * batch_size_gpu, top1.avg))