# ResNet kvantizácia


## Obsah
*   [Predspracovanie](#predspracovanie)
<a href='#predspracovanie'> </a>
*   [Statistiky na staticku kvant.](#stats)
<a href='#stats'> </a>
*   [Base precision](#base)
<a href='#base'> </a>
*   [8 bit kvantizacia](#8bit)
<a href='#8bit'> </a>
*   [4 bit kvantizacia](#4bit)
<a href='#4bit'> </a>

In [1]:
import torch
import torchvision
import distiller
from distiller.models import create_model


print(torch.__version__)
print(torchvision.__version__)

1.3.1
0.4.2


<a id='predspracovanie'> </a>

# Predspracovanie

In [2]:
model = create_model(pretrained=True,dataset='imagenet',arch='resnet50') 

In [3]:
model

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): DistillerBottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=

In [4]:
import numpy as np
from torchvision import transforms
# from torch.utils.data import DataLoader

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
preprocessing = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

torch.manual_seed(0)
np.random.seed(0)

batch_size = 20
num_workers = 1
dataset = torchvision.datasets.ImageFolder('/home/bohumil/FIIT/BP/BP/Zdroje_kod/imagenet/val'
                                           ,preprocessing)

small, big = torch.utils.data.random_split(dataset,[7000, len(dataset)-7000])
 
dataloader = torch.utils.data.DataLoader(small,batch_size=batch_size,
                                         num_workers=num_workers,shuffle=True)


In [5]:
from resnet_output import resnet_output

def target_labels(dataset,target):
    list = target.tolist()
    for i in range(len(list)):
        list[i] = dataset.classes[list[i]]
        list[i] = resnet_output[list[i]]
    return torch.LongTensor(list)

In [6]:
# z <distiller_root>/jupyter/post_train_quant_convert_pytorch.ipynb
import torchnet as tnt
import math
import numpy as np


def eval_model(data_loader, model, device='cpu', print_freq=10):
    # print('Evaluation model ', model.arch)
    
    criterion = torch.nn.CrossEntropyLoss().to(device)
    
    loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True, topk=(1, 5))
    # apmeter = tnt.meter.APMeter()

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = math.ceil(total_samples / batch_size)
    print('{0} samples ({1} per mini-batch)'.format(total_samples, batch_size))

    # Switch to evaluation mode
    model.eval()

    for step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(device), target.to(device)
            # compute output from model
            output = model(inputs)
            target = target_labels(dataset,target).to(device)
            # compute loss and measure accuracy
            loss.add(criterion(output, target).item())
            classerr.add(output.data, target)

            if (step + 1) % print_freq == 0:
                print('[{:3d}/{:3d}] Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
                      step + 1, total_steps, classerr.value(1), classerr.value(5), loss.mean), flush=True)
    print('----------')
    print('Overall ==> Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}  PPL: {:.3f}'.format(
        classerr.value(1), classerr.value(5), loss.mean, np.exp(loss.mean)), flush=True)

In [7]:
import logging
def config_notebooks_logger():
    logging.config.fileConfig('logging.conf')
    msglogger = logging.getLogger()
    msglogger.info('Logging configured successfully')
    return msglogger

In [8]:
import argparse
import distiller

msglogger = config_notebooks_logger()

parser = argparse.ArgumentParser()
distiller.quantization.add_post_train_quant_args(parser)
args = parser.parse_args(args= [])
# args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet50_imagenet_post_train.yaml'

Logging configured successfully


<a id='stats'> </a>

# Correct way of getting statistics


In [9]:
cpu_model = distiller.make_non_parallel_copy(model)

In [14]:
from distiller.data_loggers import collect_quant_stats, QuantCalibrationStatsCollector, collector_context


args.qe_calibration = 0.2
if args.qe_calibration:
    
    cpu_model = distiller.make_non_parallel_copy(model).cpu()
    
    distiller.utils.assign_layer_fq_names(cpu_model)
    msglogger.info("Generating quantization calibration stats based on {0} users".format(args.qe_calibration))
    collector = distiller.data_loggers.QuantCalibrationStatsCollector(cpu_model)
    
    part = int(len(dataset)*args.qe_calibration)
    batch_size = 20
    num_workers = 1
    small, big = torch.utils.data.random_split(dataset,[part, len(dataset)-part])
    stat_loader = torch.utils.data.DataLoader(small,batch_size=batch_size,
                                         num_workers=num_workers,shuffle=True)
    
    with collector_context(collector):
        eval_model(stat_loader,cpu_model,'cpu',print_freq=30)
        # Here call your model evaluation function, making sure to execute only
        # the portion of the dataset specified by the qe_calibration argument
    yaml_path = './resnet50_quantization_stats.yaml'
    collector.save(yaml_path)

Generating quantization calibration stats based on 0.2 users


3887 samples (20 per mini-batch)
[ 30/195] Top1: 86.333  Top5: 97.500  Loss: 0.516
[ 60/195] Top1: 87.083  Top5: 97.750  Loss: 0.478
[ 90/195] Top1: 87.111  Top5: 97.722  Loss: 0.463
[120/195] Top1: 87.000  Top5: 97.667  Loss: 0.465
[150/195] Top1: 87.367  Top5: 97.533  Loss: 0.466
[180/195] Top1: 87.306  Top5: 97.417  Loss: 0.470
----------
Overall ==> Top1: 87.471  Top5: 97.453  Loss: 0.464  PPL: 1.591


<a href='#base'> </a>

# Base precision

In [9]:
if torch.cuda.is_available():
    %time eval_model(dataloader,model,'cuda', print_freq=40)

7000 samples (20 per mini-batch)
[ 40/350] Top1: 87.250  Top5: 98.125  Loss: 0.471
[ 80/350] Top1: 86.188  Top5: 97.562  Loss: 0.501
[120/350] Top1: 86.458  Top5: 97.375  Loss: 0.490
[160/350] Top1: 87.344  Top5: 97.406  Loss: 0.472
[200/350] Top1: 87.000  Top5: 97.400  Loss: 0.479
[240/350] Top1: 87.146  Top5: 97.354  Loss: 0.470
[280/350] Top1: 86.929  Top5: 97.393  Loss: 0.475
[320/350] Top1: 86.922  Top5: 97.531  Loss: 0.475
----------
Overall ==> Top1: 87.043  Top5: 97.557  Loss: 0.470  PPL: 1.600
CPU times: user 5min 32s, sys: 1.16 s, total: 5min 33s
Wall time: 5min 31s


In [9]:
args.quantize_eval = True

In [10]:
from copy import deepcopy
def eval_quantized(model, args):
    if args.quantize_eval:
        quantizer = distiller.quantization.PostTrainLinearQuantizer.from_args(deepcopy(model), args)
        # dummy = distiller.get_dummy_input(model.input_shape)
        dummy = distiller.get_dummy_input(input_shape=model.input_shape)
        quantizer.prepare_model(dummy)
        eval_model(dataloader, quantizer.model, 'cuda', print_freq=30)

<a id='8bit'> </a>

# 8 bit quantization

In [12]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train.yaml'
eval_quantized(model, args)


Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 86.167  Top5: 97.500  Loss: 0.459
[ 60/350] Top1: 86.000  Top5: 97.417  Loss: 0.465
[ 90/350] Top1: 86.389  Top5: 97.389  Loss: 0.475
[120/350] Top1: 86.625  Top5: 97.625  Loss: 0.466
[150/350] Top1: 86.633  Top5: 97.667  Loss: 0.467
[180/350] Top1: 86.639  Top5: 97.583  Loss: 0.473
[210/350] Top1: 86.595  Top5: 97.429  Loss: 0.478
[240/350] Top1: 86.646  Top5: 97.562  Loss: 0.475
[270/350] Top1: 86.870  Top5: 97.611  Loss: 0.470
[300/350] Top1: 86.833  Top5: 97.533  Loss: 0.471
[330/350] Top1: 86.864  Top5: 97.545  Loss: 0.472
----------
Overall ==> Top1: 86.829  Top5: 97.500  Loss: 0.473  PPL: 1.604


<a id='4bit'> </a>

# 4 bit quantization

## Run 1
```python
class: PostTrainLinearQuantizer
bits_activations: 4
bits_parameters: 4
bits_accum: 16
mode: ASYMMETRIC_UNSIGNED
per_channel_wts: True
clip_acts: AVG

```

In [11]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit.yaml'
eval_quantized(model, args);

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 20.667  Top5: 37.167  Loss: 5.362
[ 60/350] Top1: 18.667  Top5: 36.500  Loss: 5.449
[ 90/350] Top1: 19.167  Top5: 37.556  Loss: 5.379
[120/350] Top1: 19.958  Top5: 37.792  Loss: 5.331
[150/350] Top1: 19.467  Top5: 37.200  Loss: 5.348
[180/350] Top1: 18.944  Top5: 36.750  Loss: 5.397
[210/350] Top1: 19.000  Top5: 37.143  Loss: 5.360
[240/350] Top1: 19.063  Top5: 37.146  Loss: 5.353
[270/350] Top1: 19.593  Top5: 37.481  Loss: 5.322
[300/350] Top1: 19.583  Top5: 37.433  Loss: 5.330
[330/350] Top1: 19.576  Top5: 37.167  Loss: 5.345
----------
Overall ==> Top1: 19.657  Top5: 37.357  Loss: 5.334  PPL: 207.271


## Uprava parametrov
## Run 2
/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit2.yaml

```python
quantizers:
  post_train_quantizer:
    class: PostTrainLinearQuantizer
    bits_activations: 4
    bits_parameters: 4
    bits_accum: 16

    mode: ASYMMETRIC_UNSIGNED
    
    model_activation_stats: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
    per_channel_wts: True
    clip_acts: AVG

    overrides:
      fc:
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [12]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit2.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit2.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 18.167  Top5: 34.500  Loss: 6.043
[ 60/350] Top1: 17.333  Top5: 34.917  Loss: 6.089
[ 90/350] Top1: 17.944  Top5: 35.611  Loss: 5.991
[120/350] Top1: 18.250  Top5: 35.542  Loss: 5.928
[150/350] Top1: 18.300  Top5: 35.533  Loss: 5.950
[180/350] Top1: 18.000  Top5: 35.750  Loss: 5.956
[210/350] Top1: 17.857  Top5: 35.833  Loss: 5.933
[240/350] Top1: 17.875  Top5: 35.604  Loss: 5.931
[270/350] Top1: 17.741  Top5: 35.500  Loss: 5.941
[300/350] Top1: 17.900  Top5: 35.750  Loss: 5.935
[330/350] Top1: 17.970  Top5: 35.803  Loss: 5.945
----------
Overall ==> Top1: 17.971  Top5: 35.629  Loss: 5.958  PPL: 386.699


```python
 overrides:
    # First and last layers in 8-bits
      conv1:
        bits_weights: 8
        bits_activations: 8
      fc:
        bits_weights: 8
        bits_activations: 8
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [13]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit3.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit3.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 0.333  Top5: 0.667  Loss: 8.366
[ 60/350] Top1: 0.250  Top5: 0.750  Loss: 8.281
[ 90/350] Top1: 0.222  Top5: 0.667  Loss: 8.275
[120/350] Top1: 0.167  Top5: 0.625  Loss: 8.255
[150/350] Top1: 0.133  Top5: 0.633  Loss: 8.271
[180/350] Top1: 0.139  Top5: 0.583  Loss: 8.256
[210/350] Top1: 0.143  Top5: 0.619  Loss: 8.239
[240/350] Top1: 0.125  Top5: 0.583  Loss: 8.239
[270/350] Top1: 0.111  Top5: 0.667  Loss: 8.234
[300/350] Top1: 0.100  Top5: 0.633  Loss: 8.233
[330/350] Top1: 0.121  Top5: 0.636  Loss: 8.246
----------
Overall ==> Top1: 0.114  Top5: 0.614  Loss: 8.252  PPL: 3834.065


## Mixed model

```python
quantizers:
  post_train_quantizer:
    class: PostTrainLinearQuantizer
    bits_activations: 8
    bits_parameters: 4
    bits_accum: 16

    mode: ASYMMETRIC_UNSIGNED
   
    model_activation_stats: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
    per_channel_wts: True
    clip_acts: AVG

    overrides:
    # First and last layers in 8-bits
      conv1:
        bits_weights: 8
        bits_activations: 8
      fc:
        bits_weights: 8
        bits_activations: 8
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [14]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit4.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50/resnet50_imagenet_post_train_4bit4.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet/resnet50_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (20 per mini-batch)
[ 30/350] Top1: 0.000  Top5: 0.667  Loss: 8.109
[ 60/350] Top1: 0.000  Top5: 0.583  Loss: 8.031
[ 90/350] Top1: 0.000  Top5: 0.667  Loss: 8.069
[120/350] Top1: 0.000  Top5: 0.625  Loss: 8.046
[150/350] Top1: 0.000  Top5: 0.600  Loss: 8.051
[180/350] Top1: 0.000  Top5: 0.611  Loss: 8.055
[210/350] Top1: 0.024  Top5: 0.595  Loss: 8.086
[240/350] Top1: 0.021  Top5: 0.521  Loss: 8.086
[270/350] Top1: 0.037  Top5: 0.500  Loss: 8.095
[300/350] Top1: 0.033  Top5: 0.533  Loss: 8.090
[330/350] Top1: 0.030  Top5: 0.561  Loss: 8.086
----------
Overall ==> Top1: 0.029  Top5: 0.557  Loss: 8.093  PPL: 3270.066
