In [1]:
from __future__ import division

import sys
import os
import time
import math
# import ipdb
from datetime import datetime
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
os.environ['CUDA_VISIBLE_DEVICES']='0'

In [2]:
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('residual_net_n', 7, '')
tf.app.flags.DEFINE_string('train_tf_path', './data/train.tf', '')
tf.app.flags.DEFINE_string('val_tf_path', './data/test.tf', '')
tf.app.flags.DEFINE_integer('train_batch_size', 128, '')
tf.app.flags.DEFINE_integer('val_batch_size', 100, '')
tf.app.flags.DEFINE_float('weight_decay', 0.0005, 'Weight decay')
tf.app.flags.DEFINE_integer('summary_interval', 100, 'Interval for summary.')
tf.app.flags.DEFINE_integer('val_interval', 1000, 'Interval for evaluation.')
tf.app.flags.DEFINE_integer('max_steps', 80000, 'Maximum number of iterations.')
tf.app.flags.DEFINE_integer('save_interval', 5000, '')
tf.app.flags.DEFINE_string('f', '', 'kernel')

In [3]:
def one_hot_embedding(label, n_classes):
  """
  One-hot embedding
  Args:
    label: int32 tensor [B]
    n_classes: int32, number of classes
  Return:
    embedding: tensor [B x n_classes]
  """
  embedding_params = np.eye(n_classes, dtype=np.float32)
  with tf.device('/cpu:0'):
    params = tf.constant(embedding_params)
    embedding = tf.gather(params, label)
  return embedding

In [4]:
def conv2d(x, n_in, n_out, k, s, p='SAME', bias=False, scope='conv'):
  with tf.variable_scope(scope):
    kernel = tf.Variable(
      tf.truncated_normal([k, k, n_in, n_out],
        stddev=math.sqrt(2/(k*k*n_in))),
      name='weight')
    tf.add_to_collection('weights', kernel)
    conv = tf.nn.conv2d(x, kernel, [1,s,s,1], padding=p)
    if bias:
      bias = tf.get_variable('bias', [n_out], initializer=tf.constant_initializer(0.0))
      tf.add_to_collection('biases', bias)
      conv = tf.nn.bias_add(conv, bias)
  return conv

In [5]:
def batch_norm(x, n_out, phase_train, scope='bn', affine=True):
  """
  Batch normalization on convolutional maps.
  Args:
    x: Tensor, 4D BHWD input maps
    n_out: integer, depth of input maps
    phase_train: boolean tf.Variable, true indicates training phase
    scope: string, variable scope
    affine: whether to affine-transform outputs
  Return:
    normed: batch-normalized maps
  """
  with tf.variable_scope(scope):
    beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
      name='beta', trainable=True)
    gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
      name='gamma', trainable=affine)
    tf.add_to_collection('biases', beta)
    tf.add_to_collection('weights', gamma)

    batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
    ema = tf.train.ExponentialMovingAverage(decay=0.99)

    def mean_var_with_update():
      ema_apply_op = ema.apply([batch_mean, batch_var])
      with tf.control_dependencies([ema_apply_op]):
        return tf.identity(batch_mean), tf.identity(batch_var)
    mean, var = control_flow_ops.cond(phase_train,
      mean_var_with_update,
      lambda: (ema.average(batch_mean), ema.average(batch_var)))

    normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, 
      beta, gamma, 1e-3, affine)
  return normed

In [6]:
def residual_block(x, n_in, n_out, subsample, phase_train, scope='res_block'):
  with tf.variable_scope(scope):
    if subsample:
      y = conv2d(x, n_in, n_out, 3, 2, 'SAME', False, scope='conv_1')
      shortcut = conv2d(x, n_in, n_out, 3, 2, 'SAME',
                False, scope='shortcut')
    else:
      y = conv2d(x, n_in, n_out, 3, 1, 'SAME', False, scope='conv_1')
      shortcut = tf.identity(x, name='shortcut')
    y = batch_norm(y, n_out, phase_train, scope='bn_1')
    y = tf.nn.relu(y, name='relu_1')
    y = conv2d(y, n_out, n_out, 3, 1, 'SAME', True, scope='conv_2')
    y = batch_norm(y, n_out, phase_train, scope='bn_2')
    y = y + shortcut
    y = tf.nn.relu(y, name='relu_2')
  return y

In [7]:
def residual_group(x, n_in, n_out, n, first_subsample, phase_train, scope='res_group'):
  with tf.variable_scope(scope):
    y = residual_block(x, n_in, n_out, first_subsample, phase_train, scope='block_1')
    for i in range(n - 1):
      y = residual_block(y, n_out, n_out, False, phase_train, scope='block_%d' % (i + 2))
  return y

In [8]:
def residual_net(x, n, n_classes, phase_train, scope='res_net'):
  with tf.variable_scope(scope):
    y = conv2d(x, 3, 16, 3, 1, 'SAME', False, scope='conv_init')
    y = batch_norm(y, 16, phase_train, scope='bn_init')
    y = tf.nn.relu(y, name='relu_init')
    y = residual_group(y, 16, 16, n, False, phase_train, scope='group_1')
    y = residual_group(y, 16, 32, n, True, phase_train, scope='group_2')
    y = residual_group(y, 32, 64, n, True, phase_train, scope='group_3')
#     y = conv2d(y, 64, n_classes, 1, 1, 'SAME', True, scope='conv_last')
    y = tf.nn.avg_pool(y, [1, 8, 8, 1], [1, 1, 1, 1], 'VALID', name='avg_pool')
    y = tf.reshape(y, [-1, 64])
    w = tf.get_variable(name='weight_fc', shape=[64, n_classes], initializer=tf.contrib.layers.xavier_initializer_conv2d())
    tf.add_to_collection('weights', w)
    b = tf.get_variable(name='weight_biase', shape=[n_classes], initializer=tf.constant_initializer(0))
    tf.add_to_collection('last_biases', b)
    y = tf.matmul(y, w) + b
#     y = tf.squeeze(y, squeeze_dims=[1, 2])
  return y


In [9]:
def _loss(logits, labels, scope='loss'):
  with tf.variable_scope(scope):
    # entropy loss
    targets = one_hot_embedding(labels, 10)
    entropy_loss = tf.reduce_mean(
      tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets),
      name='entropy_loss')
    tf.add_to_collection('losses', entropy_loss)
    # weight l2 decay loss
    weight_l2_losses = [tf.nn.l2_loss(o) for o in tf.get_collection('weights')]
    weight_decay_loss = FLAGS.weight_decay*tf.add_n(weight_l2_losses)
    tf.add_to_collection('losses', weight_decay_loss)
  # for var in tf.get_collection('losses'):
    # tf.scalar_summary('losses/' + var.op.name, var)
  # total loss
  return tf.add_n(tf.get_collection('losses'), name='total_loss')

In [10]:
def _accuracy(logits, gt_label, scope='accuracy'):
  with tf.variable_scope(scope):
    pred_label = tf.argmax(logits, 1)
    acc = 1.0 - tf.nn.zero_fraction(
      tf.cast(tf.equal(pred_label, gt_label), tf.int32))
  return acc

In [11]:
def _train_op(loss, global_step, learning_rate):
  params = tf.trainable_variables()
  gradients = tf.gradients(loss, params, name='gradients')
  optim = tf.train.MomentumOptimizer(learning_rate, 0.9)
  update = optim.apply_gradients(zip(gradients, params))
  with tf.control_dependencies([update]):
    train_op = tf.no_op(name='train_op')
  return train_op

In [12]:
def cifar10_input_stream(records_path):
  reader = tf.TFRecordReader()
  filename_queue = tf.train.string_input_producer([records_path], None)
  _, record_value = reader.read(filename_queue)
  features = tf.parse_single_example(record_value,
    {
      'image_raw': tf.FixedLenFeature([], tf.string),
      'label': tf.FixedLenFeature([], tf.int64),
    })
  image = tf.decode_raw(features['image_raw'], tf.uint8)
  image = tf.reshape(image, [32,32,3])
  image = tf.cast(image, tf.float32)
  label = tf.cast(features['label'], tf.int64)
  return image, label

In [13]:
def normalize_image(image):
  # meanstd = joblib.load(FLAGS.mean_std_path)
  # mean, std = meanstd['mean'], meanstd['std']
  mean = [ 125.30690002,122.95014954,113.86599731]
  std = [ 62.9932518,62.08860397,66.70500946]
  normed_image = (image - mean) / std
  return normed_image

In [14]:
def random_distort_image(image):
  distorted_image = image
  distorted_image = tf.image.pad_to_bounding_box(
    image, 4, 4, 40, 40)  # pad 4 pixels to each side
  distorted_image = tf.random_crop(distorted_image, [32, 32, 3])
  distorted_image = tf.image.random_flip_left_right(distorted_image)
  return distorted_image


In [15]:
def make_train_batch(train_records_path, batch_size):
  with tf.variable_scope('train_batch'):
    with tf.device('/cpu:0'):
      train_image, train_label = cifar10_input_stream(train_records_path)
      train_image = normalize_image(train_image)
      train_image = random_distort_image(train_image)
      train_image_batch, train_label_batch = tf.train.shuffle_batch(
        [train_image, train_label], batch_size=batch_size, num_threads=4,
        capacity=50000,
        min_after_dequeue=1000)
  return train_image_batch, train_label_batch

In [16]:
def make_validation_batch(test_records_path, batch_size):
  with tf.variable_scope('evaluate_batch'):
    with tf.device('/cpu:0'):
      test_image, test_label = cifar10_input_stream(test_records_path)
      test_image = normalize_image(test_image)
      test_image_batch, test_label_batch = tf.train.batch(
        [test_image, test_label], batch_size=batch_size, num_threads=1,
        capacity=10000)
  return test_image_batch, test_label_batch

In [17]:
phase_train = tf.placeholder(tf.bool, name='phase_train')
learning_rate = tf.placeholder(tf.float32, name='learning_rate')

global_step = tf.Variable(0, trainable=False, name='global_step')


train_image_batch, train_label_batch = make_train_batch(FLAGS.train_tf_path, FLAGS.train_batch_size)
val_image_batch, val_label_batch = make_validation_batch(FLAGS.val_tf_path, FLAGS.val_batch_size)

image_batch, label_batch = control_flow_ops.cond(phase_train,lambda: (train_image_batch, train_label_batch),lambda: (val_image_batch, val_label_batch))


logits = residual_net(image_batch, FLAGS.residual_net_n, 10, phase_train)


loss = _loss(logits, label_batch)
accuracy = _accuracy(logits, label_batch)

# train one step


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [18]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
print('Initializing...')
saver = tf.train.Saver()
saver.restore(sess,'./full_precision/res44/model/res.ckpt')

tf.train.start_queue_runners(sess=sess)

Initializing...
INFO:tensorflow:Restoring parameters from ./full_precision/res44/model/res.ckpt


[<Thread(QueueRunnerThread-train_batch/input_producer-train_batch/input_producer/input_producer_EnqueueMany, started daemon 140086708520704)>,
 <Thread(QueueRunnerThread-train_batch/shuffle_batch/random_shuffle_queue-train_batch/shuffle_batch/random_shuffle_queue_enqueue, started daemon 140086716913408)>,
 <Thread(QueueRunnerThread-train_batch/shuffle_batch/random_shuffle_queue-train_batch/shuffle_batch/random_shuffle_queue_enqueue, started daemon 140086725306112)>,
 <Thread(QueueRunnerThread-train_batch/shuffle_batch/random_shuffle_queue-train_batch/shuffle_batch/random_shuffle_queue_enqueue, started daemon 140086733698816)>,
 <Thread(QueueRunnerThread-train_batch/shuffle_batch/random_shuffle_queue-train_batch/shuffle_batch/random_shuffle_queue_enqueue, started daemon 140086700128000)>,
 <Thread(QueueRunnerThread-evaluate_batch/input_producer-evaluate_batch/input_producer/input_producer_EnqueueMany, started daemon 140086691735296)>,
 <Thread(QueueRunnerThread-evaluate_batch/batch/fifo

In [19]:

# validation

print('Evaluating...')
n_val_samples = 10000
val_batch_size = FLAGS.val_batch_size
n_val_batch = int(n_val_samples / val_batch_size)
val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
val_labels = np.zeros((n_val_samples), dtype=np.int64)
val_losses = []
for i in range(n_val_batch):
  fetches = [logits, label_batch, loss]
  session_outputs = sess.run(
    fetches, {phase_train.name: False})
  val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
  val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
  val_losses.append(session_outputs[2])
pred_labels = np.argmax(val_logits, axis=1)
val_accuracy = np.count_nonzero(
  pred_labels == val_labels) / n_val_samples
val_loss = float(np.mean(np.asarray(val_losses)))
print('Test accuracy = %f' % val_accuracy)

Evaluating...
Test accuracy = 0.933200


In [20]:
def apply_prune_on_grads(grads_and_vars, dict_nzidx):
    for key, nzidx in dict_nzidx.items():
        count = 0
        for grad, var in grads_and_vars:
            if var.name == key:
                nzidx_obj = tf.cast(tf.constant(sess.run(dict_nzidx[key])), tf.float32)
                grads_and_vars[count] = (tf.multiply(nzidx_obj, grad), var)
            count += 1
    return grads_and_vars

def apply_inq(weights, inq_dict, var_name, prune_rate):  
    for target in var_name:
        wl = target
        bit = 8

        weight_obj = weights[wl]
        weight_arr = sess.run(weight_obj)

        weight_rest = np.reshape(weight_arr, [-1])
        dic_tem = np.reshape(sess.run(inq_dict[wl]), [-1])
        idx_rest = np.flip(np.argsort(abs(np.reshape(weight_rest, [-1]))), 0)
        num_prune = int(len(weight_rest) * prune_rate)
        weight_toINQ = weight_rest[idx_rest[:num_prune]]

        n1 = (np.floor(np.log2(max(abs(np.reshape(weight_arr, [-1]))) * 4 / 3)))
        n2 = n1 + 1 - bit / 4
        upper_bound = 2 ** (np.floor(np.log2(max(abs(np.reshape(weight_arr, [-1]))) * 4 / 3)))
        lower_bound = 2 ** (n1 + 1 - bit / 4)

        weight_toINQ[abs(weight_toINQ) < lower_bound] = 0
        weight_toINQ[weight_toINQ != 0] = 2 ** (np.floor(np.log2(abs(weight_toINQ[weight_toINQ != 0] * 4 / 3)))) * np.sign(weight_toINQ[weight_toINQ != 0])

        weight_rest[idx_rest[:num_prune]] = weight_toINQ
        weight_arr = np.reshape(weight_rest, np.shape(weight_arr))
        dic_tem[idx_rest[:num_prune]] = np.zeros_like(dic_tem[idx_rest[:num_prune]])
        inq_dict[wl] = tf.cast(np.reshape(dic_tem, np.shape(sess.run(inq_dict[wl]))), tf.float32)
        sess.run(weights[wl].assign(weight_arr))
    return inq_dict

# 开始压缩

第一轮  量化
prune_rate =0.5

In [21]:
print('prune前的准确率')
n_val_samples = 10000
val_batch_size = FLAGS.val_batch_size
n_val_batch = int(n_val_samples / val_batch_size)
val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
val_labels = np.zeros((n_val_samples), dtype=np.int64)
val_losses = []
for i in range(n_val_batch):
    fetches = [logits, label_batch, loss]
    session_outputs = sess.run(fetches, {phase_train.name: False})
    val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
    val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
val_losses.append(session_outputs[2])
pred_labels = np.argmax(val_logits, axis=1)
val_accuracy = np.count_nonzero(pred_labels == val_labels) / n_val_samples
val_loss = float(np.mean(np.asarray(val_losses)))
print('Test accuracy = %f' % val_accuracy)

prune前的准确率
Test accuracy = 0.933200


In [22]:
print('prune_rate = 0时，可视化部分参数：res_net/conv_last/bias:0')
print(sess.run(tf.get_collection('last_biases')[0]))

prune_rate = 0时，可视化部分参数：res_net/conv_last/bias:0
[ 0.08215426 -0.05700303  0.04312498  0.1090302  -0.0417071  -0.07600322
  0.00174217 -0.00667335  0.03555666 -0.09022148]


In [23]:
para_dict = {}
one_dict = {}
var_name = []
for k in tf.trainable_variables():
    para_dict[k.name] = k
    one_dict[k.name] =tf.ones_like(k)
    var_name.append(k.name)
prune_dict=apply_inq(para_dict,one_dict,var_name,0.5)
trainer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = trainer.compute_gradients(loss)
grads_and_vars = apply_prune_on_grads(grads_and_vars, prune_dict)
train_step = trainer.apply_gradients(grads_and_vars)

In [24]:
curr_lr = 0
for step in range(40000):
  if step <= 48000:
    _lr = 1e-2
  else:
    _lr = 1e-3
  if curr_lr != _lr:
    curr_lr = _lr
    print('Learning rate set to %f' % curr_lr)

  # train
  fetches = [train_step, loss]
  if step > 0 and step % FLAGS.summary_interval == 0:
    fetches += [accuracy]
  sess_outputs = sess.run(fetches, {phase_train.name: True, learning_rate.name: curr_lr})


  if step > 0 and step % FLAGS.summary_interval == 0:
    train_loss_value, train_acc_value= sess_outputs[1:]
    print('[%s] Iteration %d, train loss = %f, train accuracy = %f' %
        (datetime.now(), step, train_loss_value, train_acc_value))

  # validation
  if step > 0 and step % FLAGS.val_interval == 0:
    print('Evaluating...')
    n_val_samples = 10000
    val_batch_size = FLAGS.val_batch_size
    n_val_batch = int(n_val_samples / val_batch_size)
    val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
    val_labels = np.zeros((n_val_samples), dtype=np.int64)
    val_losses = []
    for i in range(n_val_batch):
      fetches = [logits, label_batch, loss]
      session_outputs = sess.run(
        fetches, {phase_train.name: False})
      val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
      val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
      val_losses.append(session_outputs[2])
    pred_labels = np.argmax(val_logits, axis=1)
    val_accuracy = np.count_nonzero(
      pred_labels == val_labels) / n_val_samples
    val_loss = float(np.mean(np.asarray(val_losses)))
    print('Test accuracy = %f' % val_accuracy)

Learning rate set to 0.010000
[2018-07-17 16:56:12.172671] Iteration 100, train loss = 1.903270, train accuracy = 0.343750
[2018-07-17 16:56:20.445568] Iteration 200, train loss = 2.097512, train accuracy = 0.218750
[2018-07-17 16:56:28.396475] Iteration 300, train loss = 1.765162, train accuracy = 0.312500
[2018-07-17 16:56:36.242398] Iteration 400, train loss = 1.566771, train accuracy = 0.382812
[2018-07-17 16:56:44.054265] Iteration 500, train loss = 1.650104, train accuracy = 0.343750
[2018-07-17 16:56:51.855389] Iteration 600, train loss = 1.318387, train accuracy = 0.476562
[2018-07-17 16:56:59.693854] Iteration 700, train loss = 1.478517, train accuracy = 0.453125
[2018-07-17 16:57:07.573966] Iteration 800, train loss = 1.137687, train accuracy = 0.625000
[2018-07-17 16:57:15.469568] Iteration 900, train loss = 1.280052, train accuracy = 0.531250
[2018-07-17 16:57:23.313343] Iteration 1000, train loss = 1.194305, train accuracy = 0.609375
Evaluating...
Test accuracy = 0.505000


第二轮 量化
prune_rate = 0.75

In [25]:
print('prune前的准确率')
n_val_samples = 10000
val_batch_size = FLAGS.val_batch_size
n_val_batch = int(n_val_samples / val_batch_size)
val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
val_labels = np.zeros((n_val_samples), dtype=np.int64)
val_losses = []
for i in range(n_val_batch):
    fetches = [logits, label_batch, loss]
    session_outputs = sess.run(fetches, {phase_train.name: False})
    val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
    val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
val_losses.append(session_outputs[2])
pred_labels = np.argmax(val_logits, axis=1)
val_accuracy = np.count_nonzero(pred_labels == val_labels) / n_val_samples
val_loss = float(np.mean(np.asarray(val_losses)))
print('Test accuracy = %f' % val_accuracy)

prune前的准确率
Test accuracy = 0.864800


In [26]:
print('prune_rate =0.5时，可视化部分参数：res_net/conv_last/bias:0')
print(sess.run(tf.get_collection('last_biases')[0]))

prune_rate =0.5时，可视化部分参数：res_net/conv_last/bias:0
[ 0.0625      0.          0.03413247  0.125       0.00889578 -0.0625
 -0.00152558 -0.09894396 -0.19610858 -0.0625    ]


In [27]:
para_dict = {}
one_dict = {}
var_name = []
for k in tf.trainable_variables():
    para_dict[k.name] = k
    one_dict[k.name] =tf.ones_like(k)
    var_name.append(k.name)
prune_dict=apply_inq(para_dict,one_dict,var_name,0.75)
trainer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = trainer.compute_gradients(loss)
grads_and_vars = apply_prune_on_grads(grads_and_vars, prune_dict)
train_step = trainer.apply_gradients(grads_and_vars)

In [28]:
for step in range(FLAGS.max_steps):
  if step <= 48000:
    _lr = 1e-2
  else:
    _lr = 1e-3
  if curr_lr != _lr:
    curr_lr = _lr
    print('Learning rate set to %f' % curr_lr)

  # train
  fetches = [train_step, loss]
  if step > 0 and step % FLAGS.summary_interval == 0:
    fetches += [accuracy]
  sess_outputs = sess.run(fetches, {phase_train.name: True, learning_rate.name: curr_lr})


  if step > 0 and step % FLAGS.summary_interval == 0:
    train_loss_value, train_acc_value= sess_outputs[1:]
    print('[%s] Iteration %d, train loss = %f, train accuracy = %f' %
        (datetime.now(), step, train_loss_value, train_acc_value))

  # validation
  if step > 0 and step % FLAGS.val_interval == 0:
    print('Evaluating...')
    n_val_samples = 10000
    val_batch_size = FLAGS.val_batch_size
    n_val_batch = int(n_val_samples / val_batch_size)
    val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
    val_labels = np.zeros((n_val_samples), dtype=np.int64)
    val_losses = []
    for i in range(n_val_batch):
      fetches = [logits, label_batch, loss]
      session_outputs = sess.run(
        fetches, {phase_train.name: False})
      val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
      val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
      val_losses.append(session_outputs[2])
    pred_labels = np.argmax(val_logits, axis=1)
    val_accuracy = np.count_nonzero(
      pred_labels == val_labels) / n_val_samples
    val_loss = float(np.mean(np.asarray(val_losses)))
    print('Test accuracy = %f' % val_accuracy)

[2018-07-17 17:57:30.721648] Iteration 100, train loss = 1.534838, train accuracy = 0.515625
[2018-07-17 17:57:38.697495] Iteration 200, train loss = 1.539278, train accuracy = 0.468750
[2018-07-17 17:57:46.731930] Iteration 300, train loss = 1.265530, train accuracy = 0.570312
[2018-07-17 17:57:54.865035] Iteration 400, train loss = 1.305935, train accuracy = 0.578125
[2018-07-17 17:58:03.123215] Iteration 500, train loss = 1.102514, train accuracy = 0.617188
[2018-07-17 17:58:11.333917] Iteration 600, train loss = 1.098331, train accuracy = 0.625000
[2018-07-17 17:58:19.557904] Iteration 700, train loss = 0.996875, train accuracy = 0.656250
[2018-07-17 17:58:28.149973] Iteration 800, train loss = 0.958525, train accuracy = 0.695312
[2018-07-17 17:58:36.782508] Iteration 900, train loss = 0.966294, train accuracy = 0.664062
[2018-07-17 17:58:45.381854] Iteration 1000, train loss = 0.776158, train accuracy = 0.765625
Evaluating...
Test accuracy = 0.660400
[2018-07-17 17:58:56.679543] I

第三轮  量化
prune_rate = 0.85

In [29]:
print('prune前的准确率')
n_val_samples = 10000
val_batch_size = FLAGS.val_batch_size
n_val_batch = int(n_val_samples / val_batch_size)
val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
val_labels = np.zeros((n_val_samples), dtype=np.int64)
val_losses = []
for i in range(n_val_batch):
    fetches = [logits, label_batch, loss]
    session_outputs = sess.run(fetches, {phase_train.name: False})
    val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
    val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
val_losses.append(session_outputs[2])
pred_labels = np.argmax(val_logits, axis=1)
val_accuracy = np.count_nonzero(pred_labels == val_labels) / n_val_samples
val_loss = float(np.mean(np.asarray(val_losses)))
print('Test accuracy = %f' % val_accuracy)

prune前的准确率
Test accuracy = 0.889400


In [30]:
print('prune_rate =0.75时，可视化部分参数：res_net/conv_last/bias:0')
print(sess.run(tf.get_collection('last_biases')[0]))

prune_rate =0.75时，可视化部分参数：res_net/conv_last/bias:0
[ 0.         -0.08464392  0.          0.125       0.06231114  0.
  0.11832674  0.         -0.25        0.        ]


In [31]:
para_dict = {}
one_dict = {}
var_name = []
for k in tf.trainable_variables():
    para_dict[k.name] = k
    one_dict[k.name] =tf.ones_like(k)
    var_name.append(k.name)
prune_dict=apply_inq(para_dict,one_dict,var_name,0.85)
trainer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = trainer.compute_gradients(loss)
grads_and_vars = apply_prune_on_grads(grads_and_vars, prune_dict)
train_step = trainer.apply_gradients(grads_and_vars)

In [32]:
for step in range(FLAGS.max_steps):
  if step <= 48000:
    _lr = 1e-2
  else:
    _lr = 1e-3
  if curr_lr != _lr:
    curr_lr = _lr
    print('Learning rate set to %f' % curr_lr)

  # train
  fetches = [train_step, loss]
  if step > 0 and step % FLAGS.summary_interval == 0:
    fetches += [accuracy]
  sess_outputs = sess.run(fetches, {phase_train.name: True, learning_rate.name: curr_lr})


  if step > 0 and step % FLAGS.summary_interval == 0:
    train_loss_value, train_acc_value= sess_outputs[1:]
    print('[%s] Iteration %d, train loss = %f, train accuracy = %f' %
        (datetime.now(), step, train_loss_value, train_acc_value))

  # validation
  if step > 0 and step % FLAGS.val_interval == 0:
    print('Evaluating...')
    n_val_samples = 10000
    val_batch_size = FLAGS.val_batch_size
    n_val_batch = int(n_val_samples / val_batch_size)
    val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
    val_labels = np.zeros((n_val_samples), dtype=np.int64)
    val_losses = []
    for i in range(n_val_batch):
      fetches = [logits, label_batch, loss]
      session_outputs = sess.run(
        fetches, {phase_train.name: False})
      val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
      val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
      val_losses.append(session_outputs[2])
    pred_labels = np.argmax(val_logits, axis=1)
    val_accuracy = np.count_nonzero(
      pred_labels == val_labels) / n_val_samples
    val_loss = float(np.mean(np.asarray(val_losses)))
    print('Test accuracy = %f' % val_accuracy)

Learning rate set to 0.010000
[2018-07-17 19:59:55.582136] Iteration 100, train loss = 1.320621, train accuracy = 0.546875
[2018-07-17 20:00:03.454748] Iteration 200, train loss = 1.184263, train accuracy = 0.617188
[2018-07-17 20:00:11.339847] Iteration 300, train loss = 1.021544, train accuracy = 0.703125
[2018-07-17 20:00:19.245027] Iteration 400, train loss = 0.926669, train accuracy = 0.718750
[2018-07-17 20:00:27.251195] Iteration 500, train loss = 0.854170, train accuracy = 0.703125
[2018-07-17 20:00:35.437961] Iteration 600, train loss = 0.814507, train accuracy = 0.734375
[2018-07-17 20:00:43.708402] Iteration 700, train loss = 0.918158, train accuracy = 0.687500
[2018-07-17 20:00:51.881003] Iteration 800, train loss = 0.748516, train accuracy = 0.742188
[2018-07-17 20:01:00.333449] Iteration 900, train loss = 0.722276, train accuracy = 0.781250
[2018-07-17 20:01:08.951004] Iteration 1000, train loss = 0.777127, train accuracy = 0.757812
Evaluating...
Test accuracy = 0.706600


第四轮  量化  
prune_rate = 1.00

In [33]:
print('prune前的准确率')
n_val_samples = 10000
val_batch_size = FLAGS.val_batch_size
n_val_batch = int(n_val_samples / val_batch_size)
val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
val_labels = np.zeros((n_val_samples), dtype=np.int64)
val_losses = []
for i in range(n_val_batch):
    fetches = [logits, label_batch, loss]
    session_outputs = sess.run(fetches, {phase_train.name: False})
    val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
    val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
val_losses.append(session_outputs[2])
pred_labels = np.argmax(val_logits, axis=1)
val_accuracy = np.count_nonzero(pred_labels == val_labels) / n_val_samples
val_loss = float(np.mean(np.asarray(val_losses)))
print('Test accuracy = %f' % val_accuracy)

prune前的准确率
Test accuracy = 0.881900


In [34]:
print('prune_rate =0.85时，可视化部分参数：res_net/conv_last/bias:0')
print(sess.run(tf.get_collection('last_biases')[0]))

prune_rate =0.85时，可视化部分参数：res_net/conv_last/bias:0
[ 0.15257302  0.          0.17163539  0.125       0.          0.
  0.          0.         -0.25        0.        ]


In [35]:
para_dict = {}
one_dict = {}
var_name = []
for k in tf.trainable_variables():
    para_dict[k.name] = k
    one_dict[k.name] =tf.ones_like(k)
    var_name.append(k.name)
prune_dict=apply_inq(para_dict,one_dict,var_name,1)
trainer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = trainer.compute_gradients(loss)
grads_and_vars = apply_prune_on_grads(grads_and_vars, prune_dict)
train_step = trainer.apply_gradients(grads_and_vars)

In [36]:
for step in range(FLAGS.max_steps):
  if step <= 48000:
    _lr = 1e-2
  else:
    _lr = 1e-3
  if curr_lr != _lr:
    curr_lr = _lr
    print('Learning rate set to %f' % curr_lr)

  # train
  fetches = [train_step, loss]
  if step > 0 and step % FLAGS.summary_interval == 0:
    fetches += [accuracy]
  sess_outputs = sess.run(fetches, {phase_train.name: True, learning_rate.name: curr_lr})


  if step > 0 and step % FLAGS.summary_interval == 0:
    train_loss_value, train_acc_value= sess_outputs[1:]
    print('[%s] Iteration %d, train loss = %f, train accuracy = %f' %
        (datetime.now(), step, train_loss_value, train_acc_value))

  # validation
  if step > 0 and step % FLAGS.val_interval == 0:
    print('Evaluating...')
    n_val_samples = 10000
    val_batch_size = FLAGS.val_batch_size
    n_val_batch = int(n_val_samples / val_batch_size)
    val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
    val_labels = np.zeros((n_val_samples), dtype=np.int64)
    val_losses = []
    for i in range(n_val_batch):
      fetches = [logits, label_batch, loss]
      session_outputs = sess.run(
        fetches, {phase_train.name: False})
      val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
      val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
      val_losses.append(session_outputs[2])
    pred_labels = np.argmax(val_logits, axis=1)
    val_accuracy = np.count_nonzero(
      pred_labels == val_labels) / n_val_samples
    val_loss = float(np.mean(np.asarray(val_losses)))
    print('Test accuracy = %f' % val_accuracy)

Learning rate set to 0.010000
[2018-07-17 22:02:37.475118] Iteration 100, train loss = 2.528941, train accuracy = 0.250000
[2018-07-17 22:02:43.942346] Iteration 200, train loss = 2.306299, train accuracy = 0.289062
[2018-07-17 22:02:50.426330] Iteration 300, train loss = 2.526438, train accuracy = 0.242188
[2018-07-17 22:02:56.905899] Iteration 400, train loss = 2.280803, train accuracy = 0.312500
[2018-07-17 22:03:03.393365] Iteration 500, train loss = 2.456332, train accuracy = 0.250000
[2018-07-17 22:03:09.885598] Iteration 600, train loss = 2.506089, train accuracy = 0.195312
[2018-07-17 22:03:16.434381] Iteration 700, train loss = 2.316119, train accuracy = 0.289062
[2018-07-17 22:03:23.011443] Iteration 800, train loss = 2.258167, train accuracy = 0.320312
[2018-07-17 22:03:29.708654] Iteration 900, train loss = 2.485927, train accuracy = 0.257812
[2018-07-17 22:03:36.444277] Iteration 1000, train loss = 2.514295, train accuracy = 0.203125
Evaluating...
Test accuracy = 0.275200


In [37]:
print('prune后的准确率')
n_val_samples = 10000
val_batch_size = FLAGS.val_batch_size
n_val_batch = int(n_val_samples / val_batch_size)
val_logits = np.zeros((n_val_samples, 10), dtype=np.float32)
val_labels = np.zeros((n_val_samples), dtype=np.int64)
val_losses = []
for i in range(n_val_batch):
    fetches = [logits, label_batch, loss]
    session_outputs = sess.run(fetches, {phase_train.name: False})
    val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0]
    val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1]
val_losses.append(session_outputs[2])
pred_labels = np.argmax(val_logits, axis=1)
val_accuracy = np.count_nonzero(pred_labels == val_labels) / n_val_samples
val_loss = float(np.mean(np.asarray(val_losses)))
print('Test accuracy = %f' % val_accuracy)

prune后的准确率
Test accuracy = 0.275400


In [38]:
print('prune_rate =1.00时，可视化部分参数：res_net/conv_last/bias:0')
print(sess.run(tf.get_collection('last_biases')[0]))

prune_rate =1.00时，可视化部分参数：res_net/conv_last/bias:0
[ 0.125  0.     0.125  0.125  0.     0.     0.     0.    -0.25   0.   ]
