# ResNet kvantizácia

In [1]:
import torch
import torchvision
import distiller
from distiller.models import create_model


print(torch.__version__)
print(torchvision.__version__)

1.3.1
0.4.2


In [2]:
model = create_model(pretrained=True,dataset='imagenet',arch='resnet18') 

In [3]:
DATASET_PATH = "/home/bohumil/FIIT/BP/BP/Zdroje_kod/imagenet"
val_images = DATASET_PATH + "/val/images"

# z <distiller_root>/jupyter/post_train_quant_convert_pytorch.ipynb
distiller.set_seed(0)
subset_size = 1.0
val_split = 0.8
batch_size_gpu = 30
workers_gpu = 1

# use train for STATS and val for EVAL 
train_loader_gpu, val_loader_gpu , test_loader_gpu, _ = distiller.apputils.load_data('imagenet', DATASET_PATH, batch_size=batch_size_gpu, workers=workers_gpu, 
                             validation_split=val_split, fixed_subset=False, sequential=False, 
                             test_only=False)

In [4]:
# z <distiller_root>/jupyter/post_train_quant_convert_pytorch.ipynb
import torchnet as tnt
import math
import numpy as np


def eval_model(data_loader, model, device='cpu', print_freq=10):
    # print('Evaluation model ', model.arch)
    
    criterion = torch.nn.CrossEntropyLoss().to(device)
    
    loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True, topk=(1, 5))
    # apmeter = tnt.meter.APMeter()

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = math.ceil(total_samples / batch_size)
    print('{0} samples ({1} per mini-batch)'.format(total_samples, batch_size))

    # Switch to evaluation mode
    model.eval()

    for step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(device), target.to(device)
            # compute output from model
            output = model(inputs)

            # compute loss and measure accuracy
            loss.add(criterion(output, target).item())
            classerr.add(output.data, target)

            if (step + 1) % print_freq == 0:
                print('[{:3d}/{:3d}] Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
                      step + 1, total_steps, classerr.value(1), classerr.value(5), loss.mean), flush=True)
    print('----------')
    print('Overall ==> Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}  PPL: {:.3f}'.format(
        classerr.value(1), classerr.value(5), loss.mean, np.exp(loss.mean)), flush=True)
    # print(apmeter.value())


def eval_model2(data_loader, model, criterion, device, print_freq = 30):
    model.eval()
    loss = AverageMeter()
    acc1 = AverageMeter()
    acc5 = AverageMeter()
    total_steps = math.ceil(data_loader.samles / data_loader.batch_size)
    
    for step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            target = target.to(device)
            inputs = inputs.to(device)
            
            output=model(inputs)
            loss = criterion(output, target)
            
            acc1, acc5 = accuracy(output.detach(),target, topk=(1,5))
            loss.update(float(loss),inputs.size(0))
            top1.update(float(acc1), inputs.size(0))
            top5.update(float(acc5), inputs.size(0))
            
            if (step + 1) % print_freq == 0:
                print('[{:3d}/{:3d}] Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}'.format(
                      step + 1, total_steps, acc1, acc5, loss.mean), flush=True)
    print('----------')
    print('Overall ==> Top1: {:.3f}  Top5: {:.3f}  Loss: {:.3f}  PPL: {:.3f}'.format(
        acc1, acc5, loss.mean, np.exp(loss.mean), flush=True))
    model.train()
    return loss.avg, acc1.avg, acc5.avg
            

In [5]:
import logging
def config_notebooks_logger():
    logging.config.fileConfig('logging.conf')
    msglogger = logging.getLogger()
    msglogger.info('Logging configured successfully')
    return msglogger

In [6]:
import argparse

msglogger = config_notebooks_logger()

parser = argparse.ArgumentParser()
distiller.quantization.add_post_train_quant_args(parser)
args = parser.parse_args(args= [])
# args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train.yaml'

Logging configured successfully


Correct way of getting statistics

In [7]:
cpu_model = distiller.make_non_parallel_copy(model)

In [None]:
from distiller.data_loggers import collect_quant_stats, QuantCalibrationStatsCollector, collector_context


args.qe_calibration = 0.2
if args.qe_calibration:
    
    cpu_model = distiller.make_non_parallel_copy(model).cpu()
    
    distiller.utils.assign_layer_fq_names(cpu_model)
    msglogger.info("Generating quantization calibration stats based on {0} users".format(args.qe_calibration))
    collector = distiller.data_loggers.QuantCalibrationStatsCollector(cpu_model)
    with collector_context(collector):
        eval_model(train_loader_gpu,cpu_model,'cuda',print_freq=30)
        # Here call your model evaluation function, making sure to execute only
        # the portion of the dataset specified by the qe_calibration argument
    yaml_path = './act_quantization_stats.yaml'
    collector.save(yaml_path)

In [34]:
if torch.cuda.is_available():
    %time eval_model(train_loader_gpu,model,'cuda')

3888 samples (30 per mini-batch)
[ 10/130] Top1: 0.000  Top5: 0.667  Loss: 15.329
[ 20/130] Top1: 0.000  Top5: 0.667  Loss: 15.252
[ 30/130] Top1: 0.222  Top5: 1.000  Loss: 15.115
[ 40/130] Top1: 0.250  Top5: 1.417  Loss: 15.082
[ 50/130] Top1: 0.400  Top5: 1.667  Loss: 15.100
[ 60/130] Top1: 0.333  Top5: 1.611  Loss: 15.091
[ 70/130] Top1: 0.333  Top5: 1.524  Loss: 15.070
[ 80/130] Top1: 0.292  Top5: 1.625  Loss: 15.041
[ 90/130] Top1: 0.259  Top5: 1.556  Loss: 15.087
[100/130] Top1: 0.233  Top5: 1.567  Loss: 15.068
[110/130] Top1: 0.242  Top5: 1.667  Loss: 15.037
[120/130] Top1: 0.278  Top5: 1.611  Loss: 15.048
[130/130] Top1: 0.257  Top5: 1.595  Loss: 15.042
----------
Overall ==> Top1: 0.257  Top5: 1.595  Loss: 15.042 PPL: 3409551.409
CPU times: user 58.7 s, sys: 565 ms, total: 59.2 s
Wall time: 56.5 s


In [10]:
args.quantize_eval = True

In [11]:
from copy import deepcopy
def eval_quantized(model, args):
    if args.quantize_eval:
        quantizer = distiller.quantization.PostTrainLinearQuantizer.from_args(deepcopy(model), args)
        # dummy = distiller.get_dummy_input(model.input_shape)
        dummy = distiller.get_dummy_input(input_shape=model.input_shape)
        quantizer.prepare_model(dummy)
        eval_model2(train_loader_gpu, quantizer.model, 'cuda', print_freq=30)

In [36]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


3888 samples (30 per mini-batch)
[ 30/130] Top1: 0.111  Top5: 0.333  Loss: 7.483
[ 60/130] Top1: 0.111  Top5: 0.389  Loss: 7.492
[ 90/130] Top1: 0.111  Top5: 0.370  Loss: 7.504
[120/130] Top1: 0.139  Top5: 0.500  Loss: 7.492
----------
Overall ==> Top1: 0.129  Top5: 0.463  Loss: 7.492 PPL: 1794.262


In [13]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train2.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train2.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


3888 samples (30 per mini-batch)
[ 30/130] Top1: 0.333  Top5: 1.111  Loss: 10.886
[ 60/130] Top1: 0.222  Top5: 1.111  Loss: 10.891
[ 90/130] Top1: 0.296  Top5: 1.370  Loss: 10.816
[120/130] Top1: 0.333  Top5: 1.528  Loss: 10.803
----------
Overall ==> Top1: 0.334  Top5: 1.517  Loss: 10.798 PPL: 48925.403


In [14]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train3.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train3.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer
Applying batch-norm folding ahead of post-training quantization
Propagating output statistics from BN modules to folded modules
Optimizing output statistics for modules followed by ReLU/Tanh/Sigmoid
Updated stats saved to ./quant_stats_after_prepare_model.yaml
Per-layer quantization parameters saved to ./layer_quant_params.yaml


3888 samples (30 per mini-batch)
[ 30/130] Top1: 0.222  Top5: 1.556  Loss: 12.115
[ 60/130] Top1: 0.278  Top5: 1.500  Loss: 12.083
[ 90/130] Top1: 0.370  Top5: 1.593  Loss: 12.022
[120/130] Top1: 0.361  Top5: 1.722  Loss: 12.021
----------
Overall ==> Top1: 0.386  Top5: 1.698  Loss: 12.038 PPL: 168981.175


In [12]:
args.qe_config_file = '/home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train3.yaml'
eval_quantized(model, args)

Reading configuration from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/resnet18_imagenet_post_train3.yaml
Found component of class PostTrainLinearQuantizer: Name: post_train_quantizer ; Section: quantizers
Loading activation stats from: /home/bohumil/FIIT/BP/BP/Zdroje_kod/quantization_jupyters/acts_quantization_stats.yaml
Preparing model for quantization using PostTrainLinearQuantizer


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 1.96 GiB total capacity; 160.49 MiB already allocated; 9.25 MiB free; 13.51 MiB cached)

In [51]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


def evaluate(model, criterion, data_loader, neval_batches):
    model.eval()
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    cnt = 0
    with torch.no_grad():
        for image, target in data_loader:
            output = model(image)
            loss = criterion(output, target)
            cnt += 1
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            print('.', end = '')
            top1.update(acc1[0], image.size(0))
            top5.update(acc5[0], image.size(0))
            if cnt >= neval_batches:
                 return top1, top5

    return top1, top5

In [53]:
num_eval_batches = 2
top1, top5 = evaluate(distiller.make_non_parallel_copy(model).cpu(), torch.nn.CrossEntropyLoss(), train_loader_gpu, neval_batches=20)
print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * batch_size_gpu, top1.avg))

....................Evaluation accuracy on 60 images, 0.33
