In [1]:
import numpy as np 
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ReLU  
from tensorflow.keras import backend as K 
from tensorflow.keras.callbacks import TensorBoard 
from tensorflow.contrib.quantize import experimental_create_training_graph 

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# 载入数据
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = np.expand_dims(x_train, axis=3)
x_test = np.expand_dims(x_test, axis=3)

In [3]:
# 数据预处理
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [4]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [5]:
# 建立模型
input_tensor = Input(shape=(28, 28, 1), name='input_tensor')
# 第一层
x = Conv2D(32, (3, 3), name='conv1')(input_tensor)
x = ReLU(name='relu1')(x)
x = MaxPooling2D(pool_size=(2, 2), name='maxpool1')(x)
## 第二层
x = Conv2D(64, (3, 3), name='conv2')(x)
x = ReLU(name='relu2')(x)
x = MaxPooling2D(pool_size=(2, 2), name='maxpool2')(x)
# 第三层
x = Conv2D(128, (3, 3), name='conv3')(x)
x = ReLU(name='relu3')(x)
x = MaxPooling2D(pool_size=(2, 2), name='maxpool3')(x)
# 输出层
x = Flatten(name='flatten')(x)
x = Dense(128)(x)
x = ReLU(name='reluout')(x)

output_tensor = Dense(10, name='output_tensor')(x)

model = Model(inputs=input_tensor, outputs=output_tensor)

sess = K.get_session()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [6]:
#记录每个训练模型中的所有变量。
#准备恢复每个训练模型的权重。
#导致下一步重写中添加的变量get
#添加到全局变量集合中，
#我们需要在重写图形之前进行记录
per_trained_model_path = './models/float_point/model.ckpt'
restore_dict = {}
reader = tf.train.NewCheckpointReader(per_trained_model_path)
for v in tf.global_variables():
    tensor_name = v.name.split(':')[0]
    if reader.has_tensor(tensor_name):
        restore_dict[tensor_name] = v

In [7]:
# 重写图，向训练图添加伪量化操作
experimental_create_training_graph(input_graph=sess.graph, 
                                   weight_bits=8, 
                                   activation_bits=8)

# 当添加伪量化操作时，我们添加了很多变量
# 所以我们必须初始化变量
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

In [8]:
# 恢复包含在每个训练模型中的变量
saver = tf.train.Saver(restore_dict)
saver.restore(sess, per_trained_model_path)

# 检查是否成功添加了伪量化操作
for node in sess.graph.as_graph_def().node:
    if 'AssignMaxLast' in node.name or 'AssignMinLast' in node.name:
        print('node name: {}'.format(node.name))

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./models/float_point/model.ckpt
node name: conv1/weights_quant/AssignMinLast
node name: conv1/weights_quant/AssignMaxLast
node name: conv2/weights_quant/AssignMinLast
node name: conv2/weights_quant/AssignMaxLast
node name: conv3/weights_quant/AssignMinLast
node name: conv3/weights_quant/AssignMaxLast
node name: dense/weights_quant/AssignMinLast
node name: dense/weights_quant/AssignMaxLast
node name: output_tensor/weights_quant/AssignMinLast
node name: output_tensor/weights_quant/AssignMaxLast


In [9]:
# 编译模型。 通常，我们在加载时会使用较小的学习率 
# 已经训练好的浮点模型 
model.compile(loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              optimizer=keras.optimizers.Adam(learning_rate=1e-4),
              metrics=['accuracy'])

In [10]:
#评估伪造量化操作添加模型的性能
#在量化意识训练开始之前
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

#创建一个tensorboard回调以查看重写图的详细信息
#模型编号。 例如，在conv1范围内，我们可以找到act_quant
#子图，其中包含有关量化的信息，例如最小和最大。
tensorboard = TensorBoard('logs')

Test loss: 2.8432933586120606
Test accuracy: 0.103


In [11]:
# ＃开始量化感知训练
model.fit(x_train, y_train,
          batch_size=128,
          epochs=3,
          verbose=1,
          validation_data=(x_test, y_test), 
          callbacks=[tensorboard])

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f3ce047d7d0>

In [12]:
# 评估量化意识训练模型的性能
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.1373714783065021
Test accuracy: 0.9562


In [13]:
# save quantize-aware trained model to checkpoint file
saver = tf.train.Saver()
saver.save(sess, './models/quant_aware_trained/model.ckpt')

'./models/quant_aware_trained/model.ckpt'