# ResNet kvantizácia

In [1]:
import torch
import torchvision
import distiller
from distiller.models import create_model


print(torch.__version__)
print(torchvision.__version__)

1.3.1
0.4.2


In [2]:
model = create_model(pretrained=True,dataset='imagenet',arch='resnet18') 

In [3]:
model

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): DistillerBasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (add): EltwiseAdd()
        (relu2): ReLU(inplace=True)
      )
      (1): DistillerBasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bia

In [4]:
from torchvision import transforms
# from torch.utils.data import DataLoader

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
preprocessing = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

batch_size = 40
num_workers = 1
dataset = torchvision.datasets.ImageFolder('/home/bohumil/FIIT/BP/BP/Zdroje_kod/imagenet/train'
                                           ,preprocessing)

small, big = torch.utils.data.random_split(dataset,[7000, len(dataset)-7000])
 
dataloader = torch.utils.data.DataLoader(small,batch_size=batch_size,
                                         num_workers=num_workers,shuffle=True)


In [5]:
from resnet_output import resnet_output

def target_labels(dataset,target):
    list = target.tolist()
    for i in range(len(list)):
        list[i] = dataset.classes[list[i]]
        list[i] = resnet_output[list[i]]
    return torch.LongTensor(list)

In [6]:
# z <distiller_root>/jupyter/post_train_quant_convert_pytorch.ipynb
import torchnet as tnt
import math
import numpy as np


def eval_model(data_loader, model, device='cpu', print_freq=10):
    # print('Evaluation model ', model.arch)
    
    criterion = torch.nn.CrossEntropyLoss().to(device)
    
    loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True, topk=(1, 5))
    # apmeter = tnt.meter.APMeter()

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = math.ceil(total_samples / batch_size)
    print('{0} samples ({1} per mini-batch)'.format(total_samples, batch_size))

    # Switch to evaluation mode
    model.eval()

    for step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(device), target.to(device)
            # compute output from model
            output = model(inputs)
            target = target_labels(dataset,target).to(device)
            # compute loss and measure accuracy
            loss.add(criterion(output, target).item())
            classerr.add(output.data, target)

            if (step + 1) % print_freq == 0:
                print('[{:3d}/{:3d}] Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
                      step + 1, total_steps, classerr.value(1), classerr.value(5), loss.mean), flush=True)
    print('----------')
    print('Overall ==> Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}  PPL: {:.3f}'.format(
        classerr.value(1), classerr.value(5), loss.mean, np.exp(loss.mean)), flush=True)

In [11]:
import logging
def config_notebooks_logger():
    logging.config.fileConfig('logging.conf')
    msglogger = logging.getLogger()
    msglogger.info('Logging configured successfully')
    return msglogger

In [12]:
import argparse
import distiller

msglogger = config_notebooks_logger()

parser = argparse.ArgumentParser()
distiller.quantization.add_post_train_quant_args(parser)
args = parser.parse_args(args= [])
# args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train.yaml'

Logging configured successfully


Correct way of getting statistics

In [13]:
cpu_model = distiller.make_non_parallel_copy(model)

In [None]:
from distiller.data_loggers import collect_quant_stats, QuantCalibrationStatsCollector, collector_context


args.qe_calibration = 0.2
if args.qe_calibration:
    
    cpu_model = distiller.make_non_parallel_copy(model).cpu()
    
    distiller.utils.assign_layer_fq_names(cpu_model)
    msglogger.info("Generating quantization calibration stats based on {0} users".format(args.qe_calibration))
    collector = distiller.data_loggers.QuantCalibrationStatsCollector(cpu_model)
    with collector_context(collector):
        eval_model(train_loader_gpu,cpu_model,'cuda',print_freq=30)
        # Here call your model evaluation function, making sure to execute only
        # the portion of the dataset specified by the qe_calibration argument
    yaml_path = './act_quantization_stats.yaml'
    collector.save(yaml_path)

In [13]:
if torch.cuda.is_available():
    %time eval_model(dataloader,model,'cuda', print_freq=40)

7000 samples (40 per mini-batch)
[ 40/175] Top1: 79.375  Top5: 93.750  Loss: 0.807
[ 80/175] Top1: 79.062  Top5: 94.188  Loss: 0.816
[120/175] Top1: 78.167  Top5: 93.854  Loss: 0.845
[160/175] Top1: 78.500  Top5: 93.828  Loss: 0.838
----------
Overall ==> Top1: 78.471  Top5: 93.857  Loss: 0.843  PPL: 2.323
CPU times: user 1min 27s, sys: 910 ms, total: 1min 28s
Wall time: 1min 26s


In [16]:
args.quantize_eval = True

In [17]:
from copy import deepcopy
def eval_quantized(model, args):
    if args.quantize_eval:
        quantizer = distiller.quantization.PostTrainLinearQuantizer.from_args(deepcopy(model), args)
        # dummy = distiller.get_dummy_input(model.input_shape)
        dummy = distiller.get_dummy_input(input_shape=model.input_shape)
        quantizer.prepare_model(dummy)
        eval_model(dataloader, quantizer.model, 'cuda', print_freq=30)

In [17]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (40 per mini-batch)
[ 30/175] Top1: 79.000  Top5: 93.750  Loss: 0.824
[ 60/175] Top1: 79.042  Top5: 93.917  Loss: 0.815
[ 90/175] Top1: 79.111  Top5: 94.250  Loss: 0.809
[120/175] Top1: 78.562  Top5: 93.875  Loss: 0.843
[150/175] Top1: 78.467  Top5: 93.767  Loss: 0.849
----------
Overall ==> Top1: 78.414  Top5: 93.771  Loss: 0.847  PPL: 2.333


4 bit quantization

## Run 1
```python
class: PostTrainLinearQuantizer
bits_activations: 4
bits_parameters: 4
bits_accum: 16
mode: ASYMMETRIC_UNSIGNED
per_channel_wts: True
clip_acts: AVG

```

In [18]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (40 per mini-batch)
[ 30/175] Top1: 27.583  Top5: 50.333  Loss: 3.869
[ 60/175] Top1: 29.000  Top5: 51.542  Loss: 3.754
[ 90/175] Top1: 28.278  Top5: 51.528  Loss: 3.791
[120/175] Top1: 27.812  Top5: 50.917  Loss: 3.795
[150/175] Top1: 27.567  Top5: 51.033  Loss: 3.790
----------
Overall ==> Top1: 27.586  Top5: 51.000  Loss: 3.780  PPL: 43.807


## Uprava parametrov
## Run 2
/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit2.yaml

```python
quantizers:
  post_train_quantizer:
    class: PostTrainLinearQuantizer
    bits_activations: 4
    bits_parameters: 4
    bits_accum: 16

    mode: ASYMMETRIC_UNSIGNED
    
    model_activation_stats: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
    per_channel_wts: True
    clip_acts: AVG

    overrides:
      fc:
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [19]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit2.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit2.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (40 per mini-batch)
[ 30/175] Top1: 23.333  Top5: 44.583  Loss: 4.315
[ 60/175] Top1: 23.750  Top5: 43.875  Loss: 4.254
[ 90/175] Top1: 23.333  Top5: 43.889  Loss: 4.265
[120/175] Top1: 23.188  Top5: 44.208  Loss: 4.249
[150/175] Top1: 23.267  Top5: 44.183  Loss: 4.244
----------
Overall ==> Top1: 23.357  Top5: 44.457  Loss: 4.234  PPL: 69.003


In [None]:
```python
 overrides:
    # First and last layers in 8-bits
      conv1:
        bits_weights: 8
        bits_activations: 8
      fc:
        bits_weights: 8
        bits_activations: 8
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [20]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit3.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit3.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (40 per mini-batch)
[ 30/175] Top1: 0.083  Top5: 1.000  Loss: 8.946
[ 60/175] Top1: 0.125  Top5: 1.000  Loss: 9.030
[ 90/175] Top1: 0.111  Top5: 1.000  Loss: 9.069
[120/175] Top1: 0.146  Top5: 1.062  Loss: 9.082
[150/175] Top1: 0.167  Top5: 1.000  Loss: 9.073
----------
Overall ==> Top1: 0.171  Top5: 0.957  Loss: 9.088  PPL: 8852.849


## Mixed model

```python
quantizers:
  post_train_quantizer:
    class: PostTrainLinearQuantizer
    bits_activations: 8
    bits_parameters: 4
    bits_accum: 16

    mode: ASYMMETRIC_UNSIGNED
   
    model_activation_stats: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
    per_channel_wts: True
    clip_acts: AVG

    overrides:
    # First and last layers in 8-bits
      conv1:
        bits_weights: 8
        bits_activations: 8
      fc:
        bits_weights: 8
        bits_activations: 8
        clip_acts: NONE  # Don't clip activations in last layer before softmax
```

In [18]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit4.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train_4bit4.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


7000 samples (40 per mini-batch)
[ 30/175] Top1: 0.167  Top5: 1.083  Loss: 9.294
[ 60/175] Top1: 0.083  Top5: 1.125  Loss: 9.334
[ 90/175] Top1: 0.194  Top5: 1.472  Loss: 9.317
[120/175] Top1: 0.187  Top5: 1.458  Loss: 9.319
[150/175] Top1: 0.167  Top5: 1.333  Loss: 9.327
----------
Overall ==> Top1: 0.171  Top5: 1.300  Loss: 9.336  PPL: 11342.605


In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


def evaluate(model, criterion, data_loader, neval_batches):
    model.eval()
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    cnt = 0
    with torch.no_grad():
        for image, target in data_loader:
            output = model(image)
            loss = criterion(output, target)
            cnt += 1
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            print('.', end = '')
            top1.update(acc1[0], image.size(0))
            top5.update(acc5[0], image.size(0))
            if cnt >= neval_batches:
                 return top1, top5

    return top1, top5

In [None]:
num_eval_batches = 2
top1, top5 = evaluate(distiller.make_non_parallel_copy(model).cpu(), torch.nn.CrossEntropyLoss(), train_loader_gpu, neval_batches=20)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * batch_size_gpu, top1.avg))