Skip to content


Browse files Browse the repository at this point in the history
  • Loading branch information
jasonustc committed Nov 8, 2019
1 parent 316c37d commit 05522aa
Show file tree
Hide file tree
Showing 19 changed files with 3,316 additions and 203 deletions.
402 changes: 201 additions & 201 deletions LICENSE

Large diffs are not rendered by default.

56 changes: 54 additions & 2 deletions
@@ -1,2 +1,54 @@
# alibabacloud-quantization-networks
# Quantization Networks

### Overview
This repository contains the training code of Quantization Networks introduced in our CVPR 2019 paper: [*Quantization Networks*](

In this work, we provide a **simple and uniform way** for weights and activations quantization by formulating it as a differentiable non-linear function.
The quantization function is represented as a linear combination of several
Sigmoid functions with learnable biases and scales that
could be learned in a lossless and end-to-end manner via
continuous relaxation of the steepness of Sigmoid functions.

Extensive experiments on image classification and object
detection tasks show that our quantization networks outperform state-of-the-art methods.

### Run environment

+ Python 3.5
+ Python bindings for OpenCV
+ Pytorch 0.3.0

### Usage

Download the ImageNet dataset and decompress into the structure like


To train a weight quantization model of ResNet-18, simply run


After the training, the result model will be stored in `./logs/quan-weight/resnet18-quan-w-1`.

Other training processes can be found in the paper.

### License
+ Apache License 2.0

### Citation
If you use our code or models in your research, please cite with:
title={Quantization Networks},
author={Yang Jiwei, Shen Xu, Xing Jun, Tian Xinmei, Li Houqiang, Deng Bing, Huang Jianqiang and Hua Xian-sheng},
192 changes: 192 additions & 0 deletions
@@ -0,0 +1,192 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# is used to quantize the weight of model.

from __future__ import print_function, absolute_import

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import math
import numpy
import pdb

def sigmoid_t(x, b=0, t=1):
The sigmoid function with T for soft quantization function.
x: input
b: the bias
t: the temperature
y = sigmoid(t(x-b))
temp = -1 * t * (x - b)
temp = torch.clamp(temp, min=-10.0, max=10.0)
return 1.0 / (1.0 + torch.exp(temp))

def step(x, bias):
The step function for ideal quantization function in test stage.
y = torch.zeros_like(x)
mask = - bias, 0.0)
y[mask] = 1.0
return y

class QuaOp(object):
Quantize weight.
model: the model to be quantified.
QW_biases (list): the bias of quantization function.
QW_biases is a list with m*n shape, m is the number of layers,
n is the number of sigmoid_t.
QW_values (list): the list of quantization values,
such as [-1, 0, 1], [-2, -1, 0, 1, 2].
Quantized model.
def __init__(self, model, QW_biases, QW_values=[]):
# Count the number of Conv2d and Linear
count_targets = 0
for m in model.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
count_targets = count_targets + 1
# Omit the first conv layer and the last linear layer
start_range = 1
end_range = count_targets - 2
self.bin_range = numpy.linspace(start_range,
end_range, end_range-start_range+1)\
self.num_of_params = len(self.bin_range)
self.saved_params = []
self.target_params = []
self.target_modules = []
index = -1
for m in model.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
index = index + 1
if index in self.bin_range:
tmp =

print('target_modules number: ', len(self.target_modules))
self.QW_biases = QW_biases
self.QW_values = QW_values
# the number of sigmoid_t
self.n = len(self.QW_values) - 1
self.threshold = self.QW_values[-1] * 5 / 4.0
# the gap between two quantization values
self.scales = []
offset = 0.
for i in range(self.n):
gap = self.QW_values[i + 1] - self.QW_values[i]
offset += gap
self.offset = offset / 2.

def forward(self, x, T, quan_bias, train=True):
if train:
y = sigmoid_t(x, b=quan_bias[0], t=T)*self.scales[0]
for j in range(1, self.n):
y += sigmoid_t(x, b=quan_bias[j], t=T)*self.scales[j]
y = step(x, bias=quan_bias[0])*self.scales[0]
for j in range(1, self.n):
y += step(x, bias=quan_bias[j])*self.scales[j]
y = y - self.offset

return y

def backward(self, x, T, quan_bias):
y_1 = sigmoid_t(x, b=quan_bias[0], t=T)*self.scales[0]
y_grad = (y_1.mul(self.scales[0] - y_1)).div(self.scales[0])
for j in range(1, self.n):
y_temp = sigmoid_t(x, b=quan_bias[j], t=T)*self.scales[j]
y_grad += (y_temp.mul(self.scales[j] - y_temp)).div(self.scales[j])

return y_grad

def quantization(self, T, alpha, beta, init, train_phase=True):
The operation of network quantization.
T: the temperature, a single number.
alpha: the scale factor of the output, a list.
beta: the scale factor of the input, a list.
init: a flag represents the first loading of the quantization function.
train_phase: a flag represents the quantization
operation in the training stage.
self.quantizeConvParams(T, alpha, beta, init, train_phase=train_phase)

def save_params(self):
save the float parameters for backward
for index in range(self.num_of_params):

def restore_params(self):
for index in range(self.num_of_params):

def quantizeConvParams(self, T, alpha, beta, init, train_phase):
quantize the parameters in forward
T = (T > 2000)*2000 + (T <= 2000)*T
for index in range(self.num_of_params):
if init:
beta[index].data = torch.Tensor([self.threshold / self.target_modules[index].data.abs().max()]).cuda()
alpha[index].data = torch.reciprocal(beta[index].data)
# scale w
x = self.target_modules[index].data.mul(beta[index].data)

y = self.forward(x, T, self.QW_biases[index], train=train_phase)
#scale w^hat
self.target_modules[index].data = y.mul(alpha[index].data)

def updateQuaGradWeight(self, T, alpha, beta, init):
Calculate the gradients of all the parameters.
The gradients of model parameters are saved in the [Variable]
T: the temperature, a single number.
alpha: the scale factor of the output, a list.
beta: the scale factor of the input, a list.
init: a flag represents the first loading of the quantization function.
alpha_grad: the gradient of alpha.
beta_grad: the gradient of beta.
beta_grad = [0.0] * len(beta)
alpha_grad = [0.0] * len(alpha)
T = (T > 2000)*2000 + (T <= 2000)*T
for index in range(self.num_of_params):
if init:
beta[index].data = torch.Tensor([self.threshold / self.target_modules[index].data.abs().max()]).cuda()
alpha[index].data = torch.reciprocal(beta[index].data)
x = self.target_modules[index].data.mul(beta[index].data)

# set T = 1 when train binary model
y_grad = self.backward(x, 1, self.QW_biases[index]).mul(T)
# set T = T when train the other quantization model
#y_grad = self.backward(x, T, self.QW_biases[index]).mul(T)

beta_grad[index] = y_grad.mul(self.target_modules[index].data).mul(alpha[index].data).\
alpha_grad[index] = self.forward(x, T, self.QW_biases[index]).\

self.target_modules[index] = y_grad.mul(beta[index].data).mul(alpha[index].data).\
return alpha_grad, beta_grad

30 changes: 30 additions & 0 deletions
@@ -0,0 +1,30 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os.path

# gets home dir cross platform
HOME = os.path.expanduser("~")

QW_values = {
'alexnet-w-1': [-1, 0, 1], 'alexnet-w-2': [-1, 0, 1], 'alexnet-w-3-pm2': [-2, -1, 0, 1, 2], 'alexnet-w-3-pm4': [-4, -2, -1, 0, 1, 2, 4],
'resnet18-w-1': [-1, 0, 1], 'resnet18-w-2': [-1, 0, 1],'resnet18-w-3':[-4,-2,-1,0,1,2,4]

QW_biases = {
'alexnet-w-1':[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]],

'resnet18-w-1':[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]],

QA_biases = {
'resnet18-a-1':[[0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05], [0.05],
[0.05], [0.05]]


0 comments on commit 05522aa

Please sign in to comment.