## 实例：用经典卷积神经网络对cifar-10数据进行图像分类

In [None]:
##设定全局参数
import os
import re
import sys
import tarfile
import tensorflow as tf
import CIFAR10.CIFAR_input as input
FLAGS=tf.app.flags.FLAGS

#模型参数
tf.app.flags.DEFINE_integer('batch_size', 64,
                            """Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('data_dir', './CIFAR10',
                           """Path to the CIFAR-10 data directory.""")
tf.app.flags.DEFINE_boolean('use_fp16', False,
                            """Train the model using fp16.""")

#全局变量
IMAGE_SIZE=input.IMAGE_SIZE
NUM_CLASSES=input.NUM_CLASSES
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN=input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL=input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL

#训练过程中的常量
MOVING_AVERAGE_DECAY=0.9999
NUM_EPOCH_PER_DECAY=350.0 #epochs after which learning rate decays
LEARNING_RATE_DECAY_FACTOR=0.1 #学习率衰减因子
INITIAL_LEARNING_RATE=0.1

In [None]:
##构建模型
def inference(images):
    """
    创建CIFAR-10模型
    :param images: Images来自distorted_inputs()或inputs()
    :return:
    Logits神经元
    """
    #conv1
    with tf.variable_scope('conv1')as scope:
        kernel=_variable_with_weight_decay('weights',shape=[5,5,3,64],stddev=5e-2,wd=0.0)
        conv=tf.nn.conv2d(images,kernel,[1,1,1,1],padding='SAME')#卷积操作
        biases=_variable_on_cpu('biases',[64],tf.constant_initializer(0.0))
        pre_activation=tf.nn.bias_add(conv,biases)# WX+b
        conv1=tf.nn.relu(pre_activation,name=scope.name)
        _activation_summary(conv1)

    #pool1
    pool1=tf.nn.max_pool(conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool1')


    #norm1
    norm1=tf.nn.lrn(pool1,4,bias=1.0,alpha=0.001/9.0,beta=0.75,name='norm1')

    #conv2
    with tf.variable_scope('conv2') as scope:
        kernel=_variable_with_weight_decay('weights',shape=[5,5,64,64],stddev=5e-2,wd=0.0)
        conv=tf.nn.conv2d(norm1,kernel,[1,1,1,1],padding='SAME')
        biases=_variable_on_cpu('biases',[64],tf.constant_initializer(0.1))
        pre_activation=tf.nn.bias_add(conv,biases)
        conv2=tf.nn.relu(pre_activation,name=scope.name)
        _activation_summary(conv2)

     #norm2
    norm2=tf.nn.lrn(conv2,4,bias=1.0,alpha=0.001/9.0,beta=0.75,name='norm2')

    #pool2
    pool2=tf.nn.max_pool(norm2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool2')

    #local3
    with tf.variable_scope('local3')as scope:
        #Move everything into depth so we can perform a single matrix multiply
        reshape=tf.reshape(pool2,[FLAGS.batch_size,-1])
        dim=reshape.get_shape()[1].value
        weights=_variable_with_weight_decay('weights',shape=[dim,384],stddev=0.04,wd=0.004)
        biases=_variable_on_cpu('biases',[384],tf.constant_initializer(0.1))
        local3=tf.nn.relu(tf.matmul(reshape,weights)+biases,name=scope.name)
        _activation_summary(local3)

     #local4
    with tf.variable_scope('local4') as scope:
        weights = _variable_with_weight_decay('weights', shape=[384, 192],
                                              stddev=0.04, wd=0.004)
        biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
        _activation_summary(local4)

    with tf.variable_scope('softmax_linear') as scope:
        weights=_variable_with_weight_decay('weights',[192,NUM_CLASSES],stddev=1/192.0,wd=0.0)
        biases=_variable_on_cpu('biases',[NUM_CLASSES],tf.constant_initializer(0.0))
        softmax_linear=tf.add(tf.matmul(local4,weights),biases,name=scope.name)
        _activation_summary(softmax_linear)

    return softmax_linear 



In [None]:
def _variable_with_weight_decay(name,shape,stddev,wd):
    """
    Helper to create an initialized Variable with weight decay

    这里变量被初始化为截断正态分布
    :param stddev:标准差
    :param wd: add L2 loss weight decay multiplied by this float. If None, weight decay is not added for this Variable
    :return:
    Variable tensor
    """

    dtype=tf.float16 if FLAGS.use_fp16 else tf.float32
    var=_variable_on_cpu(name,shape,tf.truncated_normal_initializer(stddev=stddev,dtype=dtype))
    if wd is not None:
      weight_decay=tf.multiply(tf.nn.l2_loss(var),wd,name='weight_loss')
      tf.add_to_collection('losses',weight_decay)

    return var

In [None]:
def _variable_on_cpu(name,shape,initializer):
    """
    Helper to create a Variable stored oon CPU memory
    :param name: 变量名
    :param shape: lists of ints
    :param initializer: 初始化变量值
    :return:
    Variable Tensor
    """
    with tf.device('/cpu:0'):
        dtype=tf.float16 if FLAGS.use_fp16 else tf.float32
        var=tf.get_variable(name,shape,initializer=initializer,dtype=dtype)
        return var

In [None]:
#计算loss
def loss(logits,labels):
    """
    Add L2loss to all the trainable variables
    Add summary for "loss" and "loss/avg"
    :param logits: logits from inference()
    :param labels: labels from distorted_inputs or inputs() 1-D tensor of shape[batch_size]

    :return: loss tensor of type float
    """

    #计算平均交叉熵损失对一个batch
    labels=tf.cast(labels,tf.int64)
    cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits,name="cross_entropy_per_exapmle")
    cross_entropy_mean=tf.reduce_mean(cross_entropy,name='cross_entropy')
    tf.add_to_collection('losses',cross_entropy_mean)

    #总共的损失应该是交叉熵损失加上权重衰减项（L2 LOSS）
    #权重的二范数值刚刚也加到了'losses'的collection里，这里的tf.add_n()就是将loss和刚刚的weights的二范数值对应相加
    return tf.add_n(tf.get_collection('losses'),name='total_loss')

In [None]:
# 更新参数/train_op
def train(total_loss,global_step):
    """
    Train CIFAR-10 model
    设立优化器，并对于所有可训练变量添加滑动平均
    :param total_loss:Total loss from loss()
    :param global_step:integer Varibale conunting the number of trainnig steps processed
    :return: train_op:op for training
    """
    #Variables that affect learning rate
    num_batches_per_epoch=NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN/FLAGS.batch_size
    decay_steps=int(num_batches_per_epoch* NUM_EPOCH_PER_DECAY)

    #decay the learning rate exponentially based on the number of steps
    #随着迭代过程衰减学习率
    lr=tf.train.exponential_decay(
        INITIAL_LEARNING_RATE,global_step,
        decay_steps,LEARNING_RATE_DECAY_FACTOR,staircase=True)
    tf.summary.scalar('learning_rate',lr)

    #滑动平均 of all losses and associated summaries
    loss_averages_op=_add_loss_summaries(total_loss)

    #计算梯度
    with tf.control_dependencies([loss_averages_op]):
        opt=tf.train.GradientDescentOptimizer(lr)
        grads=opt.compute_gradients(total_loss)

    #apply gradients
    apply_gradient_op=opt.apply_gradients(grads,global_step=global_step)
    #This is the second part of `minimize()`. It returns an `Operation` that applies gradients.

    #add histogram
    for grad,var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name+'/gradients',grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op

#计算loss的平均值
def _add_loss_summaries(total_loss):
    """
    Add summaries for losses in CIFAR-10 model
    Generates moving average for all losses and 
    associated summaries of visualizing the performnce of the network
    :param total_loss:Total loss from loss()
    :return:
    loss_averages_op: op for generating moving averages of losses
    """
    #计算moving average of all individual losses and the total loss
    #MovingAverage为滑动平均，计算方法：对于一个给定的数列，首先设定一个固定的值k，
    #然后分别计算第1项到第k项，第2项到第k+1项，第3项到第k+2项的平均值，依次类推。
    loss_averages=tf.train.ExponentialMovingAverage(0.9,name='avg')
    losses=tf.get_collection('losses')
    loss_averages_op=loss_averages.apply(losses+[total_loss])

    #给每一个单独的losses和total loss attach a scalar summary;do the same
    #for the averaged version of the losses
    for l in losses+[total_loss]:
        tf.summary.scalar(l.op.name+'(raw)',l)
        tf.summary.scalar(l.op.name,loss_averages.average(l))

    return loss_averages_op

In [None]:
#训练
#全局参数
from datetime import datetime
import time
import tensorflow as tf
from CIFAR10 import model_build
FLAGS=tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('train_dir','E:/Python/tensorflow/CIFAR10',"""Directory
where to write event logs and checkpoint""")
tf.app.flags.DEFINE_integer('max_steps',100000,"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
                            """Whether to log device placement.""")
tf.app.flags.DEFINE_integer('log_frequency', 10,
                            """How often to log results to the console.""")

In [None]:
#train函数
def train1():
    with tf.Graph().as_default():
        global_step=tf.contrib.framework.get_or_create_global_step()
        #use the default graph in the process in the context
        #global_step=tf.Variable(0,name='global_step',trainable=False)
        #获取图像和标签
        images,labels=model_build.distorted_inputs()

        #创建一个图来计算神经元预测值，前向传播
        logits=model_build.inference(images)

        #计算loss
        loss=model_build.loss(logits,labels)

        #建一个图来来训练一个Batch的样本然后更新参数
        train_op=model_build.train(loss,global_step)
        #专门定义_LoggerHook类，在mon_sess这个对话中注册
        class _LoggerHook(tf.train.SessionRunHook):
            """
            Logs loss and runtime.
            """
            def begin(self):
                self._step=-1
                self._start_time=time.time()

            def before_run(self,run_context):
                #Called before each call to run()
                #返回‘SessionRunArgs’对象意味着ops或者tensors去加入即将到来的run()，
                #这些ops和tensor回合之前的一起送入run()
                #run()的参数里还可以包括你要feed的东西

                #run_context参数包括了即将到来的run()的信息：原始的op和tensors
                #当该函数运行完，图就确定了，就不能再加op了
                self._step+=1
                return tf.train.SessionRunArgs(loss) #Asks for loss value
            def after_run(self,run_context,run_values):
                #Called after eah call to run()
                #'run value' argument contains results of requested ops/tensors by'before_run'
                #the 'run_context' argument 与送入before_run的是一样的
                #'run_context.request_stop()'can be called to stop the iteration
                if self._step % FLAGS.log_frequency==0:#当取了FLAGS.log_frequency个batches的时候
                    current_time=time.time()
                    duration=current_time-self._start_time
                    self._start_time=current_time

                    loss_value=run_values.results
                    examples_per_sec=FLAGS.log_frequency* FLAGS.batch_size/duration
                    sec_per_barch=float(duration/FLAGS.log_frequency)
                    format_str=('%s:step %d,loss=%.2f (%.1f examples/sec; %.3f' 'sec/batch')
                    print(format_str %(datetime.now(),self._step,loss_value,examples_per_sec,sec_per_barch))

        with tf.train.MonitoredTrainingSession(
            #set proper session intializer/restorer,it also creates hooks related to
            #checkpoint and summary saving
            checkpoint_dir=FLAGS.train_dir,
            hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),tf.train.NanTensorHook(loss),
                   _LoggerHook()],
            config=tf.ConfigProto(
                log_device_placement=FLAGS.log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
                #此处表示在停止条件到达之前，循环运行train_op,更新模型参数

def main(argv=None):
            train1()

if __name__=='__main__':
            tf.app.run(main=main)