# 显式地定义变量进行batch_normalization
问题1:为什么TF的BN操作，和论文中不同？TF中的BN操作，在参数默认情况下，特征经过处理得到的是自身。
答：直接run(y)得到的结果中间缺了操作（甚至tf.layers.batch_normalization接口也有同样“毛病”），完整的跑模型就没出现这个问题。问题出在不训练而直接推测，看函数内部，mean和variance是从变量拿得数值，变量这时候是初始值，mean=0,variance=1,(x-0)/1当然是x自己。

问题2:为什么只做normalize就能预防梯度消失增加训练效率，还要有一个gamma和beta来转换？
答：这个确实是一个矛盾，算是一个trade-off。

问题3:多channel怎么处理？如果channel是最后一个axis，实际代码并不满足这个情况
答：其实mean就是整个channel的mean，而不是特征图“像素点”，见例2。但是，这个算法不一定是权威或者普适，保留意见。如果需要，也可以办到。


扩展：python手动的放别的文件了。


In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.python.training import moving_averages

# 作为参考，测试不需要传参的黑盒接口的输出 
help(tf.layers.batch_normalization)
这个也犯病，这个输出不符合预期，原因，没训练过

In [7]:
x0 = tf.constant([[1.,3.,3.],[5.,4.,5.]])
y0 = tf.layers.batch_normalization(x0)
print(x0)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(x0))
    print(sess.run(y0))
#help(tf.layers.batch_normalization)

Tensor("Const_2:0", shape=(2, 3), dtype=float32)
[[ 1.  3.  3.]
 [ 5.  4.  5.]]
[[ 0.99950039  2.9985013   2.9985013 ]
 [ 4.99750185  3.99800158  4.99750185]]


# 带自定义变量的batch_norm的基本使用

In [3]:
def create_var(name, shape, initializer, trainable = True):
    return tf.get_variable(name, shape = shape, dtype = tf.float32,
                           initializer = initializer, trainable = trainable)
# batch norm layer
def batch_norm(x, decay=0.999, epsilon=1e-03, is_training=True,
               scope="scope"):
    x_shape = x.get_shape()
    num_inputs = x_shape[-1]
    reduce_dims = list(range(len(x_shape) - 1))
    with tf.variable_scope(scope):
        beta = create_var("beta", [num_inputs,],
                               initializer=tf.zeros_initializer())
        gamma = create_var("gamma", [num_inputs,],
                                initializer=tf.ones_initializer())
        # for inference
        moving_mean = create_var("moving_mean", [num_inputs,],
                                 initializer=tf.zeros_initializer(),
                                 trainable=False)
        moving_variance = create_var("moving_variance", [num_inputs],
                                     initializer=tf.ones_initializer(),
                                     trainable=False)
    if is_training:
        mean, variance = tf.nn.moments(x, axes=reduce_dims)
        update_move_mean = moving_averages.assign_moving_average(moving_mean,
                                                mean, decay=decay)
        update_move_variance = moving_averages.assign_moving_average(moving_variance,
                                                variance, decay=decay)
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_mean)
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_variance)
    else:
        mean, variance = moving_mean, moving_variance
    return tf.nn.batch_normalization(x, mean, variance, beta, gamma, epsilon)

In [8]:
#不训练的情况下，inference阶段，其实mean, variance不是x自身计算得到，是初始值，
#所以此例输出结果数值并不正确，主要是看一下epsilon

#不训练的情况下，直接把x做BN得到y的结果，直接inference是使用了初始变量值
#变量默认横移beta=0，缩放gamma=1
#最后0比0,分母多个epsilon，会比1:1小一些，默认epsilon会得到一个接近原来x并且小一点的值
#如果让epsilon过小，比如1e-09，可以看到y的输出仍然是[1.,2.,3.]

#可以调整epsilon观察变化。
x1 = tf.constant([1.,2.,3.])
y1 = batch_norm(x1, is_training=False,scope="bn_1",epsilon=1e-09)#decay=0.999, epsilon=1e-03, 都不管
print(x1)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(x1))
    print(sess.run(y1))
    

Tensor("Const_3:0", shape=(3,), dtype=float32)
[ 1.  2.  3.]
[ 1.  2.  3.]


# 封装成类，把里边的参数做一些接口，方便查询
通过多次修改类，最终发现还是placeholder好用，
类对象一次定义，不用其他接口进行变更，内部参数可以去掉很多self，换成placeholder。保留查询接口即可。

In [2]:
def create_var(name, shape, initializer, trainable = True):
    return tf.get_variable(name, shape = shape, dtype = tf.float32,
                           initializer = initializer, trainable = trainable)

class batch_normalization:
    def __init__(self, x, decay=0.999, epsilon=1e-03, is_training=True,
                   scope="scope"):
        input_shape = x.get_shape()
        num_inputs = input_shape[-1]#以2*3为例，num_inputs算是特征数量，但是有了channel的话
        reduce_dims = list(range(len(input_shape) - 1))#一维数据的话，这就是空集了，不行
#         self.epsilon = epsilon
#         self.decay = decay
        
        with tf.variable_scope(scope):
            self.beta = create_var("beta", [num_inputs,],
                                   initializer=tf.zeros_initializer())
            self.gamma = create_var("gamma", [num_inputs,],
                                    initializer=tf.ones_initializer())
            # for inference
            self.moving_mean = create_var("moving_mean", [num_inputs,],
                                     initializer=tf.zeros_initializer(),
                                     trainable=False)
            self.moving_variance = create_var("moving_variance", [num_inputs],
                                         initializer=tf.ones_initializer(),
                                         trainable=False)
        if is_training is not None:#训练阶段update变量
            self.mean, self.variance = tf.nn.moments(x, axes=reduce_dims)#默认tf.nn.moments=False
            update_move_mean = moving_averages.assign_moving_average(self.moving_mean,
                                                    self.mean, decay=decay)
            update_move_variance = moving_averages.assign_moving_average(self.moving_variance,
                                                    self.variance, decay=decay)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_mean)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_variance)
        else:#推测阶段直接使用变量
            self.mean, self.variance = self.moving_mean, self.moving_variance
        #init不能return，可以定义一个out，直接引用
        self.output = tf.nn.batch_normalization(x, self.mean, self.variance, self.beta, self.gamma, epsilon)
        

## 例0:用多个样本随便跑一下BN的输出
一个数据的话，每个特征就是自身，经过normalize还是得到自身。
如果是多个数据，bn的axis是0,也就是最外部的中括号，是多个数据的同一个特征平均来算
不过转换结果都不太符合预期！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！！

In [7]:
#错误例子，删了。。。
#举例，第一个特征，1和4平均是2.5,方差是2.25，normalize之后scale和shift之前应该分别是-1和+1,gamma和beta分别是1,0
#所以理论上，normalize之后第一个特征，两个数据x0和x1应该得到一样的值？公式是不是漏了什么说明？
#1和4经过normalize变成了相同的两个值？也许两个数据也不方便观察，毕竟平均值永远都是中间值
#实测，无论几个样本，最终y都几乎等于x。。。

In [5]:
#对照组：
#这是未经初始化直接预测的例子，无论怎么换，转换后的值都是比原值稍微小一点。不会变成期望的那个分布。（片头交代了=）
x2 = tf.constant([[1.,2.,3.],[4.,5.,6.],[2.,3.,4.]])
bn = batch_normalization(x2, is_training=False,scope="BN_training_False")
y2 = bn.output
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print('x2:',sess.run(x2))
    print('y2:',sess.run(y2))
    print('moving_mean:',sess.run(bn.moving_mean))
    print('moving_variance:',sess.run(bn.moving_variance))
    print('gamma:',sess.run(bn.gamma))
    print('beta:',sess.run(bn.beta))

x2: [[ 1.  2.  3.]
 [ 4.  5.  6.]
 [ 2.  3.  4.]]
y2: [[ 0.99950039  1.99900079  2.9985013 ]
 [ 3.99800158  4.99750185  5.9970026 ]
 [ 1.99900079  2.9985013   3.99800158]]
moving_mean: [ 0.  0.  0.]
moving_variance: [ 1.  1.  1.]
gamma: [ 1.  1.  1.]
beta: [ 0.  0.  0.]


In [6]:
#直接设置训练阶段，输出正常
#没有执行update，所以变量没变
#因为什么？输出都是0呢？gamma和beta和moving_mean和moving_variance都一样。
#可能之前一直串变量，到本例就符合预期了，y3是-1和1!！！！！！！！！！！！！！！
#本例和前一例区别是is_training？所以，核心区别是is_training内的流程！
#不训练而直接推测，mean和variance是拿得变量，变量这时候是初始值，mean=0,variance=1,(x-0)/1当然是自己
x3 = tf.constant([[1.,2.,3.],[4.,5.,6.]])
bn3 = batch_normalization(x3, is_training=True,scope="BN_training_True")#decay=0.999, epsilon=1e-03, 都不管
y3 = bn3.output
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(x3))
    print('y3:',sess.run(y3))
    print('gamma:',sess.run(bn3.gamma))
    print(sess.run(bn3.beta))
    print(sess.run(bn3.moving_mean))
    print(sess.run(bn3.moving_variance))

[[ 1.  2.  3.]
 [ 4.  5.  6.]]
y3: [[-0.99977791 -0.99977779 -0.99977791]
 [ 0.99977779  0.99977803  0.99977779]]
gamma: [ 1.  1.  1.]
[ 0.  0.  0.]
[ 0.  0.  0.]
[ 1.  1.  1.]


## 例1：试一个基本完整流程
不是用WX+B，直接就通过BN去拟合input_和label_
不过这个例子不算一个实际的BN操作，这里beta和gamma过大了，实际的BN缩放和偏移是很微小的。

placeholder这里会有些冲突，每次重启内核！！！！

经过了训练和测试两个阶段。
训练使用x1_和label1_
测试使用x2_
结果x2_和x1_输出一样
结果“不符合预期”，但也符合预期，因为这是BN，不是真的prediction


In [7]:
x1_ = np.array([[1.,2.,3.],[11.,12.,13.]], dtype=np.float)#
print(x1_.dtype)
print(x1_.shape)
label1_ = tf.constant([[2.,4.,6.],[22.,24.,26.]],dtype=tf.float32)

x2_ = np.array([[3.,3.,3.],[10.,14.,20.]],dtype=np.float)

input_ = tf.placeholder(shape=[2,3],dtype = tf.float32)
is_training = tf.placeholder(dtype=tf.bool)


print(type(input_))
bn_ = batch_normalization(input_, is_training = is_training,scope='experiment_2')
pred = bn_.output
loss = tf.reduce_sum(tf.square(pred - label1_))

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

float64
(2, 3)
<class 'tensorflow.python.framework.ops.Tensor'>


In [8]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    #print(sess.run(input_))
    for i in range(500):
        if i % 1 == 0:#一定要把打印放在训练前，才能看到变量的初始值
            print('i:',i)
            print('pred is ',sess.run(pred, feed_dict={input_:x1_, is_training:True}))
            print('beta:',sess.run(bn_.beta, feed_dict={input_:x1_, is_training:True}))
            print('gamma:',sess.run(bn_.gamma, feed_dict={input_:x1_, is_training:True}))
        sess.run(train_op, feed_dict={input_:x1_, is_training:True})

            
    #inference：结果没有错，但是不是常规模型期待的预测结果
    #因为无论什么数据，两个样本的话，归一化以后都是-1和1
    #-1和1,经过这个固定的gamma和beta，会变成相同的值，这个本质上不是预测功能
    #但是这却是是BN追求的最终效果，归一化缩放所有输入（表达能力降低）然后又按一个固定的scale和shift处理一下。
    #bn_.change_stage(is_training = False)
    print('test : pred is ',sess.run(pred, feed_dict={input_:x2_, is_training:False}))
    print('test : beta:',sess.run(bn_.beta, feed_dict={input_:x2_, is_training:False}))
    print('test : gamma:',sess.run(bn_.gamma, feed_dict={input_:x2_, is_training:False}))    
 



###

#观察过程，beta初始接近0,gamma初始接近1
#注意重启内核，有些冲突没处理好。调整学习绿，训练500次：
# pred is  [[  1.99999964   3.99999928   6.        ]
#  [ 21.99997711  23.99997711  25.99997711]]
# beta: [ 11.99998856  13.9999876   15.99998856]
# gamma: [ 10.00018883  10.00018883  10.00018883]

#具体beta和gamma的值怎么算，我对TF的BN接口暂时还有疑问。
#但是依据前边的测试，一个常规的normalize之后输出等于输入，所以本例beta和gamma就需要把数据放大2倍。
#gamma是10,10倍，beta是12,14,16，
#那也就是说，在gamma和beta处理之前，[11,12,13]应该是变成了[1,1,1]，才能输出[22,24,26]
#但是[2,4,6]的结果得怎么算？[1,2,3]缩放到更小了？
#但是这些和前边的测试都是矛盾的！！！前边的例子，在gamma和beta保持初始值的情况下，in=out

#具体初始化的时候能输出什么，可以再看一下第一次的输出
# i: 0
#input   [[1.,2.,3.],[11.,12.,13.]]
# pred is  [[-0.87996638 -0.79996634 -0.71996629]
#  [ 1.83996654  1.91996646  1.99996626]]
# beta: [ 0.47999999  0.56        0.63999999]
# gamma: [ 1.35999358  1.35999358  1.35999346]

# 首先，这个beta和gamma居然第一次也不是0和1,是散的
# 如果按这个beta的gamma的话，1和11在normalize之后分别是-1和+1,按这个参数，输出确实应该是-0.88和1.36。
# 往后看，前五步一直都满足-1和+1分别乘以gamma，然后加beta


#观察初始状态，符合-1和1的第一步转换预期，但是输出值却不是x1自身，而是严格按照公式输出！
#所以问题可能出在之前的简易流程没走通！！！！！
# i: 0
# pred is  [[-0.99997997 -0.99997997 -0.99997997]
#  [ 0.99997997  0.99997997  0.99997997]]
# beta: [ 0.  0.  0.]
# gamma: [ 1.  1.  1.]

i: 0
pred is  [[-0.99997997 -0.99997997 -0.99997997]
 [ 0.99997997  0.99997997  0.99997997]]
beta: [ 0.  0.  0.]
gamma: [ 1.  1.  1.]
i: 1
pred is  [[-0.87996638 -0.79996634 -0.71996629]
 [ 1.83996654  1.91996646  1.99996626]]
beta: [ 0.47999999  0.56        0.63999999]
gamma: [ 1.35999358  1.35999358  1.35999346]
i: 2
pred is  [[-0.764754   -0.60795408 -0.45115376]
 [ 2.64635372  2.80315375  2.95995402]]
beta: [ 0.94080001  1.09759998  1.25440001]
gamma: [ 1.70558798  1.7055881   1.70558798]
i: 3
pred is  [[-0.65415061 -0.42362243 -0.19309449]
 [ 3.42048621  3.6510148   3.88154244]]
beta: [ 1.38316798  1.61369598  1.84422398]
gamma: [ 2.03735924  2.03735924  2.03735924]
i: 4
pred is  [[-0.5479719  -0.24666512  0.05464196]
 [ 4.16365433  4.46496105  4.76626778]]
beta: [ 1.8078413   2.10914803  2.41045499]
gamma: [ 2.35586023  2.35586023  2.35586023]
i: 5
pred is  [[-0.44604081 -0.07678616  0.29246855]
 [ 4.8770957   5.24635029  5.61560488]]
beta: [ 2.21552753  2.58478212  2.95403671]
g

gamma: [ 8.83110428  8.83110332  8.83110428]
i: 51
pred is  [[  1.62602723   3.37664866   5.12726593]
 [ 19.38140488  21.13202286  22.88264465]]
beta: [ 10.50371552  12.2543354   14.00495529]
gamma: [ 8.87786579  8.87786484  8.87786674]
i: 52
pred is  [[  1.64098871   3.40158391   5.16217756]
 [ 19.48614502  21.24673843  23.00733566]]
beta: [ 10.56356716  12.32416153  14.08475685]
gamma: [ 8.92275715  8.9227562   8.9227581 ]
i: 53
pred is  [[  1.65535069   3.42552328   5.19569254]
 [ 19.58669662  21.35686684  23.12704086]]
beta: [ 10.62102413  12.3911953   14.16136646]
gamma: [ 8.96585274  8.96585178  8.96585369]
i: 54
pred is  [[  1.66913807   3.44850349   5.22786617]
 [ 19.68322754  21.46259117  23.24195671]]
beta: [ 10.67618275  12.45554733  14.23491192]
gamma: [ 9.00722504  9.00722408  9.00722599]
i: 55
pred is  [[  1.68237436   3.47056437   5.25875282]
 [ 19.77589798  21.56408691  23.35227966]]
beta: [ 10.72913551  12.5173254   14.30551529]
gamma: [ 9.04694271  9.04694176  9.04694

pred is  [[  1.95524478   3.92539334   5.89554405]
 [ 21.68653488  23.65668297  25.62683487]]
beta: [ 11.82088947  13.79103756  15.76118851]
gamma: [ 9.86584282  9.86584187  9.86584187]
i: 104
pred is  [[  1.95703483   3.92837715   5.89972258]
 [ 21.69907379  23.67041397  25.64175797]]
beta: [ 11.82805347  13.79939556  15.77074051]
gamma: [ 9.87121677  9.87121582  9.87121582]
i: 105
pred is  [[  1.95875335   3.93124199   5.90373373]
 [ 21.71110916  23.68359756  25.65608788]]
beta: [ 11.83493137  13.80741978  15.77991104]
gamma: [ 9.87637615  9.8763752   9.8763752 ]
i: 106
pred is  [[  1.96040273   3.93399262   5.90758467]
 [ 21.72266388  23.69625282  25.66984367]]
beta: [ 11.84153366  13.8151226   15.78871441]
gamma: [ 9.88132858  9.88132763  9.88132763]
i: 107
pred is  [[  1.96198702   3.93663263   5.91128159]
 [ 21.73375893  23.70840263  25.68305206]]
beta: [ 11.84787273  13.8225174   15.79716587]
gamma: [ 9.8860836   9.88608265  9.88608265]
i: 108
pred is  [[  1.96350825   3.9391679

pred is  [[  1.99721336   3.99535084   5.9934926 ]
 [ 21.98047638  23.97861481  25.97675514]]
beta: [ 11.98884487  13.98698235  15.98512363]
gamma: [ 9.99183178  9.99183178  9.99183083]
i: 172
pred is  [[  1.99732423   3.99553633   5.99375343]
 [ 21.98125839  23.9794693   25.97768402]]
beta: [ 11.98929119  13.98750305  15.98571873]
gamma: [ 9.99216652  9.99216652  9.99216557]
i: 173
pred is  [[  1.9974314    3.99571538   5.9940033 ]
 [ 21.98200607  23.98029137  25.97857666]]
beta: [ 11.98971939  13.98800278  15.98628998]
gamma: [ 9.99248791  9.99248791  9.99248695]
i: 174
pred is  [[  1.99753404   3.99588633   5.99424267]
 [ 21.98272705  23.9810791   25.97943497]]
beta: [ 11.99013042  13.98848248  15.98683834]
gamma: [ 9.99279594  9.99279594  9.99279594]
i: 175
pred is  [[  1.99763334   3.9960506    5.9944725 ]
 [ 21.98341751  23.98183441  25.98025703]]
beta: [ 11.99052525  13.9889431   15.98736477]
gamma: [ 9.99309254  9.99309254  9.99309254]
i: 176
pred is  [[  1.99772739   3.9962091

beta: [ 11.99903965  13.99887562  15.99871445]
gamma: [ 9.99947834  9.99947834  9.99947834]
i: 232
pred is  [[  1.99977076   3.999614     5.99945927]
 [ 21.99838257  23.99822807  25.99807358]]
beta: [ 11.9990778   13.99892044  15.99876595]
gamma: [ 9.99950695  9.99950695  9.99950695]
i: 233
pred is  [[  1.99978101   3.99962878   5.9994812 ]
 [ 21.99844933  23.99829674  25.99814987]]
beta: [ 11.99911499  13.99896336  15.99881554]
gamma: [ 9.99953461  9.99953461  9.99953461]
i: 234
pred is  [[  1.99978924   3.99964428   5.99950218]
 [ 21.99851227  23.99836731  25.99822426]]
beta: [ 11.99915028  13.99900532  15.99886322]
gamma: [ 9.99956131  9.99956131  9.99956131]
i: 235
pred is  [[  1.99979818   3.99965835   5.99952221]
 [ 21.9985733   23.99843216  25.99829483]]
beta: [ 11.99918461  13.99904537  15.998909  ]
gamma: [ 9.99958706  9.99958706  9.99958706]
i: 236
pred is  [[  1.99980509   3.99967217   5.99954128]
 [ 21.99862671  23.99849701  25.99836349]]
beta: [ 11.99921703  13.99908352  1

pred is  [[  1.99997461   3.99995565   5.99993801]
 [ 21.99981689  23.99979782  25.99977875]]
beta: [ 11.9998951   13.99987602  15.9998579 ]
gamma: [ 10.00012112  10.00012016  10.00012016]
i: 286
pred is  [[  1.9999752    3.99995852   5.99994087]
 [ 21.99982262  23.99980164  25.99978638]]
beta: [ 11.99989891  13.99988079  15.99986362]
gamma: [ 10.00012398  10.00012302  10.00012302]
i: 287
pred is  [[  1.9999758    3.99995971   5.99994373]
 [ 21.99982834  23.99980927  25.99979401]]
beta: [ 11.99990273  13.99988556  15.99986935]
gamma: [ 10.00012684  10.00012589  10.00012589]
i: 288
pred is  [[  1.9999764    3.99996185   5.99994564]
 [ 21.99983597  23.9998188   25.99980164]]
beta: [ 11.99990654  13.99989033  15.99987411]
gamma: [ 10.0001297   10.00012875  10.00012875]
i: 289
pred is  [[  1.99997878   3.999964     5.99994755]
 [ 21.9998436   23.99982834  25.99981117]]
beta: [ 11.99991035  13.9998951   15.99987888]
gamma: [ 10.00013256  10.00013161  10.00013161]
i: 290
pred is  [[  1.99997

pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 344
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 345
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 346
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 347
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 348
pred is  [[  1.99999

i: 395
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 396
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 397
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 398
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 399
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 400
pred is  [[  

gamma: [ 10.00018883  10.00018883  10.00018883]
i: 452
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 453
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 454
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 455
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883  10.00018883  10.00018883]
i: 456
pred is  [[  1.99999964   3.99999928   6.        ]
 [ 21.99997711  23.99997711  25.99997711]]
beta: [ 11.99998856  13.9999876   15.99998856]
gamma: [ 10.00018883

## 例2:多channel的例子
### 形状验证
关于shape计算和轴向的验证：多channel怎么处理？其实mean就是整个channel的mean，而不是特征图“像素点”的mean。但是，也不一定绝对，只是这个模板确实是把channel之上全部合并了，所以可能实际实现会有不同。

In [3]:
x = tf.constant([[1.,2.,3.],[4.,5.,6.]])
#带channel的情况，其实并不符合预期：我认为最内部的是channel，其实不是，对BN来说，内部永远是num_inputs
#简单说，BN的gamma和beta的参数量，只为满足最内部维度的“特征数量”，他永远认为最后一个维度是特征，而不是channel
#所以，如果有channel的情况，可能需要特殊处理。
#理论上讲，我这个类内部还要做channel的循环？还是向量操作隐含了？

#手懒，这个模拟RGB通道，少一个特征。
#三维
x = tf.constant([[[1.,1.1],[2.,2.2],[3.,3.3]],
                 [[4.,4.4],[5.,5.5],[6.,6.6]]])
#四维:(2, 2, 3, 3)，两个样本，每个样本有2*3,每个内部是三个channel
#reduce_dims: [0, 1, 2]，给moment用，其实是合并了外部，保留了channel，gamma、beta确实是与channel一致
#严格说，并不是特征之间的mean和variance，而是整个通道的mean和variance。。。。。
#mean: [  9.  10.  11.]
#variance: [ 31.  31.  31.]
#以第一个channel为例计算：
#1+4+7+1+4+7+11+14+17+11+14+17==108,
#108/12=9
x = tf.constant([
    [[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]],[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]]],
    [[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]],[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]]],
])
print(x.get_shape())
print(x.get_shape()[-1])
num_inputs = x.get_shape()[-1]
reduce_dims = list(range(len(x.get_shape()) - 1))
print('reduce_dims:',reduce_dims)#reduce_dims得到的是[0]
print('num_inputs:',num_inputs)
# reduce_sum1 = tf.reduce_sum(x)
# reduce_sum2 = tf.reduce_sum(x,axis=0)#0是同特征不同样本间的操作。
# reduce_sum3 = tf.reduce_sum(x,axis=1)

mean, variance = tf.nn.moments(x, axes=reduce_dims)
with tf.Session() as sess:
#     print(sess.run(reduce_sum1))
#     print(sess.run(reduce_sum2))
#     print(sess.run(reduce_sum3))
    print('mean:',sess.run(mean))
    print('variance:',sess.run(variance))
    #print(sess.run(tf.reduce_sum(x,axis=1)))

(2, 2, 3, 3)
3
reduce_dims: [0, 1, 2]
num_inputs: 3
mean: [  9.  10.  11.]
variance: [ 31.  31.  31.]


### 例2具体实现

In [5]:
x1_ = np.array([
    [[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]],[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]]],
    [[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]],[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]]],
], dtype=np.float)

print(x1_.dtype)
print(x1_.shape)
#标签同样用2倍关系
label1_ = np.multiply(x1_,2)

#x2是x1的10倍，方便观察最终预测
x2_ = np.multiply(x1_,10)

input_ = tf.placeholder(shape=[2,2,3,3],dtype = tf.float32)
is_training = tf.placeholder(dtype=tf.bool)

print(type(input_))
bn_ = batch_normalization(input_, is_training = is_training,scope='experiment_213')
pred = bn_.output
loss = tf.reduce_sum(tf.square(pred - label1_))

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

float64
(2, 2, 3, 3)
<class 'tensorflow.python.framework.ops.Tensor'>


In [6]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    #print(sess.run(input_))
    for i in range(500):
        if i % 1 == 0:#一定要把打印放在训练前，才能看到变量的初始值
            print('i:',i)
            print('pred is ',sess.run(pred, feed_dict={input_:x1_, is_training:True}))
            print('beta:',sess.run(bn_.beta, feed_dict={input_:x1_, is_training:True}))
            print('gamma:',sess.run(bn_.gamma, feed_dict={input_:x1_, is_training:True}))
        sess.run(train_op, feed_dict={input_:x1_, is_training:True})

            
    #inference：结果没有错，但是不是常规模型期待的预测结果
    #因为无论什么数据，两个样本的话，归一化以后都是-1和1
    #-1和1,经过这个固定的gamma和beta，会变成相同的值，这个本质上不是预测功能
    #但是这却是是BN追求的最终效果，归一化缩放所有输入（表达能力降低）然后又按一个固定的scale和shift处理一下。
    #bn_.change_stage(is_training = False)
    print('test : pred is ',sess.run(pred, feed_dict={input_:x2_, is_training:False}))
    print('test : beta:',sess.run(bn_.beta, feed_dict={input_:x2_, is_training:False}))
    print('test : gamma:',sess.run(bn_.gamma, feed_dict={input_:x2_, is_training:False}))    
 


'''
#500次后的预测：符合预期
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
#测试集：“不符合”预期，其实也符合，因为不是真的prediction，例1说了。
test : pred is  [[[[  1.99974537   3.99974442   5.99974537]
   [  7.99983978   9.99983883  11.99983978]
   [ 13.9999342   15.99993324  17.99993324]]

  [[  1.99974537   3.99974442   5.99974537]
   [  7.99983978   9.99983883  11.99983978]
   [ 13.9999342   15.99993324  17.99993324]]]


 [[[ 22.00006104  24.00005913  26.00005913]
   [ 28.0001545   30.0001545   32.00015259]
   [ 34.00024796  36.00024414  38.00025177]]

  [[ 22.00006104  24.00005913  26.00005913]
   [ 28.0001545   30.0001545   32.00015259]
   [ 34.00024796  36.00024414  38.00025177]]]]
test : beta: [ 17.99999619  19.99999619  21.99999809]
test : gamma: [ 11.1357069  11.1357069  11.1357069]
'''

i: 0
pred is  [[[[-1.43681931 -1.43681931 -1.43681931]
   [-0.89801204 -0.89801204 -0.89801204]
   [-0.35920477 -0.35920477 -0.35920489]]

  [[-1.43681931 -1.43681931 -1.43681931]
   [-0.89801204 -0.89801204 -0.89801204]
   [-0.35920477 -0.35920477 -0.35920489]]]


 [[[ 0.35920489  0.35920501  0.35920489]
   [ 0.89801216  0.89801216  0.89801204]
   [ 1.43681931  1.43681931  1.4368192 ]]

  [[ 0.35920489  0.35920501  0.35920489]
   [ 0.89801216  0.89801216  0.89801204]
   [ 1.43681931  1.43681931  1.4368192 ]]]]
beta: [ 0.  0.  0.]
gamma: [ 1.  1.  1.]
i: 1
pred is  [[[[ -0.61186981  -0.13186979   0.34812915]
   [  1.23758125   1.71758127   2.19758058]
   [  3.08703232   3.56703234   4.04703188]]

  [[ -0.61186981  -0.13186979   0.34812915]
   [  1.23758125   1.71758127   2.19758058]
   [  3.08703232   3.56703234   4.04703188]]]


 [[[  5.55296707   6.03296709   6.51296759]
   [  7.40241814   7.88241816   8.36241913]
   [  9.2518692    9.73186874  10.21187019]]

  [[  5.55296707   6.032

pred is  [[[[  1.99977052   3.99963427   5.99949837]
   [  7.99940252   9.99926567  11.99913025]
   [ 13.99903488  15.99889755  17.99876213]]

  [[  1.99977052   3.99963427   5.99949837]
   [  7.99940252   9.99926567  11.99913025]
   [ 13.99903488  15.99889755  17.99876213]]]


 [[[ 21.99854469  23.99840736  25.99827194]
   [ 27.99817657  29.99803734  31.99790382]
   [ 33.99780655  35.99767303  37.99753571]]

  [[ 21.99854469  23.99840736  25.99827194]
   [ 27.99817657  29.99803734  31.99790382]
   [ 33.99780655  35.99767303  37.99753571]]]]
beta: [ 17.99878883  19.9986515   21.99851799]
gamma: [ 11.13502502  11.13502407  11.13502502]
i: 36
pred is  [[[[  1.99982297   3.99972177   5.99961948]
   [  7.99954367   9.9994421   11.99934006]
   [ 13.99926472  15.99916172  17.99906158]]

  [[  1.99982297   3.99972177   5.99961948]
   [  7.99954367   9.9994421   11.99934006]
   [ 13.99926472  15.99916172  17.99906158]]]


 [[[ 21.99889183  23.99878883  25.99868774]
   [ 27.99861336  29.9985084

i: 51
pred is  [[[[  1.99999654   3.99999547   5.99999332]
   [  7.99999189   9.99999142  11.99998856]
   [ 13.9999876   15.9999876   17.99998474]]

  [[  1.99999654   3.99999547   5.99999332]
   [  7.99999189   9.99999142  11.99998856]
   [ 13.9999876   15.9999876   17.99998474]]]


 [[[ 21.99998093  23.99998283  25.99997711]
   [ 27.99997711  29.99997902  31.9999733 ]
   [ 33.99996948  35.99997711  37.99996948]]

  [[ 21.99998093  23.99998283  25.99997711]
   [ 27.99997711  29.99997902  31.9999733 ]
   [ 33.99996948  35.99997711  37.99996948]]]]
beta: [ 17.99998474  19.99998474  21.99998093]
gamma: [ 11.13569927  11.13570023  11.13569927]
i: 52
pred is  [[[[  1.99999881   3.99999619   5.99999428]
   [  7.99999523   9.99999332  11.99999046]
   [ 13.99999142  15.99999046  17.99998665]]

  [[  1.99999881   3.99999619   5.99999428]
   [  7.99999523   9.99999332  11.99999046]
   [ 13.99999142  15.99999046  17.99998665]]]


 [[[ 21.99998665  23.99998665  25.99998283]
   [ 27.99998283  29.9

gamma: [ 11.1357069  11.1357069  11.1357069]
i: 82
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 83
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.999

i: 107
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 108
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99

i: 124
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 125
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99

i: 146
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 147
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99

pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 164
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619 

beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 185
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 186
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99

i: 210
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 211
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99

gamma: [ 11.1357069  11.1357069  11.1357069]
i: 242
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 243
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.9

gamma: [ 11.1357069  11.1357069  11.1357069]
i: 273
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 274
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.9

pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 297
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619 

   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 315
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 316
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9

beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 335
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 336
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99

pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 356
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619 

i: 373
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 374
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99

beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 393
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 394
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99

i: 420
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 421
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99

gamma: [ 11.1357069  11.1357069  11.1357069]
i: 444
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 445
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.9

gamma: [ 11.1357069  11.1357069  11.1357069]
i: 470
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 471
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.9

beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
i: 499
pred is  [[[[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]

  [[  1.99999797   3.99999785   5.99999952]
   [  7.99999762   9.99999714  11.99999905]
   [ 13.99999714  15.99999714  17.99999809]]]


 [[[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]

  [[ 21.99999619  23.99999619  25.99999809]
   [ 27.99999619  29.99999619  31.99999809]
   [ 33.99999237  35.99999237  37.99999619]]]]
beta: [ 17.99999619  19.99999619  21.99999809]
gamma: [ 11.1357069  11.1357069  11.1357069]
test : pred is  [[[[  1.99974537   3.99974442   5.99974537]
   [  7.99983978   9.99983883  11.99983978]
   [ 13.9999342   15.99993324  17.99993324]]

  [[  1.99974537   3.99974442   5.99974537]
   [  7.99983978   9.99983883  11.99983978]
   [ 13.9999342   15.99

## 为什么引入BN
机器学习就是基于IID的，你现在SGD的分布乱变，不符合这种假设，网络模型学不到规律。BN去除了这个烦恼，全是同分布（虽然他也用gamma和beta搞偏移了。但是这组变量是共用的，所以分布应该还是相同的）
总之，把激活的输入分布固定住，避免Internal  Covariate Shift。

另外一方面就是导致梯度消失和收敛速度之类的，就不赘述了。


## 为什么如果gamma和beta默认1和0,最终输出等于原样不变？
“这里t层某个神经元的x(k)不是指原始输入，就是说不是t-1层每个神经元的输出，而是t层这个神经元的线性激活x=WU+B，这里的U才是t-1层神经元的输出。”

这句话，当前层t的“特征”x(k)，不是前一层t-1的神经元的输出
疑问：x=WU+B能叫线性激活吗？不是线性变换，还没激活吗？
总之，BN要变的东西，是当前这一层乘以W并且加B以后，激活之前的数。所以更没局限在0到1之间了，测试数据不超纲，为什么得不到想要的结果？

先不说TF得不到想要结果，假设能得到，他说变换后，某个神经元的激活x（对应前边那句话的x）是均值0,方差1的正态分布。




## 关于为什么normalize能解决分布问题，还要经过scale和shift的变换
normalize变换之后，形成均值0,方差1的正态分布。（虽然目前在TF实现上还有点质疑）

先不说TF得不到想要结果，假设能得到，他说变换后，某个神经元的激活x（对应前边那句话的x）是均值0,方差1的正态分布。从训练和收敛的角度，到这就够了。

但是他说会导致网络表达能力下降，所以加了scale和shift，这是学到的，但是这是怎么学的？怎么促进参数往这个方向上靠？因为normalize表达能力差，所以预测表现差？所以为了让预测表现更好，就训练出了一定的scale和shift参数？


关于表达能力下降的解释：
BN会让激活值落在非线性函数的线性區内，具体的说，sigmoid的斜率为1的那个位置，非线性。
深度网络的本质是非线性变换和拟合复杂曲线，BN相当于遏止了向这一目的发展。
所以才引入了scale和shift，也相信这个对应的gamma和beta不会太大，相对于原本偏的离谱的分布，最后应该整体还是比较偏向于中心的分布，但是又不极端靠近中心——也就是sigmoid斜率为1的0点。
先normalize，又scale和shift，好像确实这一矛盾操作也有些争议。但是完全抵消也是一个理论上的状态，实际还是有些效果的，姑且算是一个tradeoff吧。实际效果好，并且有很多其他优点，这是一个实践先于理论的行业。。。。



## 关于这个BN如何被训练到

In [None]:
#关于tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_mean)
#这个是，把update_move_mean放到这个graph的更新操作中？但是哪有默认的更新操作？不是sess.run()对应一个操作吗？
#确实如此，利用dependency来运行，


#tf提供了接口，这个算是使用自定义参数的版本？tf的黑盒操作等于隐藏了这部分参数，实际上有参数。
#自定义了beta、gamma、moving_mean、moving_variance
#训练的时候moving_mean, moving_variance特殊处理，预测的时候直接赋值。
#关于这组参数，训练的时候要更新他们，要设置到UPDATE_OPS里，并且mean和variance也是从这个batch里得到的。
#推断的时候当然不能更新了，而且这个mean和variance也不是从batch里拿了，是从variable里取出来。
#而且他们的设定是trainable=False，默认是不训练的，就是利用UPDATE_OPS来训练。途径问题！！！！
#所以，更得搞明白，这个UPDATE_OPS到底从哪取出来用了？？？？？？？？？？？？？？？？
#可能这个网络根本就不全，不能真正用来训练。只有结构，没有loss之类的？

tf.nn.batch_normalization(x, mean, variance, beta, gamma, epsilon)
beta\gamma对应offset和scale，分别是平移和缩放。


#一个完整使用案例：
#tf.layers.batch_normalization()设置training参数，以便添加到指定collection？
#todo：如何查询collection的内容？？？？
'''
注意，是training不是trainable：
training参数是一个boolean，如果设置不对，就不能好好working了。
训练的时候返回output，利用当前batch的统计数据做normalize。
推断的时候返回output，利用moving移动统计数据？

trainable是决定是否添加到GraphKeys.TRAINABLE_VARIABLES
'''
#然后通过get_collection和control_dependencies来把这个操作绑定到训练中，也就是这个黑盒接口内部的参数被训练吧
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(loss)


## What's the differences between
tf.GraphKeys.TRAINABLE_VARIABLES and 
tf.GraphKeys.UPDATE_OPS in tensorflow?

tf.GraphKeys中，TRAINABLE_VARIABLES就是被optimizer训练的variable子集。
黑盒的BN层，参数就被自动加到了GraphKeys.UPDATE_OPS
用tf.get_collection()能找到想要的tensor
When use tensorflow.contrib.layers.batch_norm(), the parameter updates_collections default value is GraphKeys.UPDATE_OPS.
How can we understand those collections, and difference in them.
Besides, we can find more in ops.py.


TRAINABLE_VARIABLES是variables的集合！
是minimizing loss时候训练的，如果不指定trainable=False，一般variable都被自动加进去了。
不可训练的使用场景两步训练，fine-tune

UPDATE_OPS是ops的集合！不是variables
维护了每个训练步骤之前的操作列表
怎么加进来的？
根据定义，更新操作发生在损失最小化的常规培训流之外，因此通常只有在特殊情况下才会将操作添加到此集合中。例如，在执行批处理规范化时，您希望在每个培训步骤之前重新计算批平均值和差异，这就是它的实现方式。本文更详细地描述了使用tf.contrib.layers.batch_norm的批处理规范化机制。
http://ruishu.io/2016/12/27/batchnorm/

In [None]:
help(tf.nn.batch_normalization)

## 其他基本接口实验和验证

### TF共有三个BN接口
1.最底层的实现，需要自定义变量，就是本文方法，麻烦一些但是方便理解原理。
tf.nn.batch_normalization(
    x,
    mean,
    variance,
    offset,
    scale,
    variance_epsilon,
    name=None
)


2.layers封装下，顾名思义，以层的形式出现。具体更新方法，前边也提过，tf.GraphKeys.UPDATE_OPS，get_collection得到，然后设置train_op的dependency，这个依赖无论用哪套实现，都要有的，只不过第一种需要手动添加再get，第二种自动添加，直接get

tf.layers.batch_normalization( inputs, axis=-1, momentum=0.99, epsilon=0.001, 
center=True,
scale=True, 
beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(), moving_mean_initializer=tf.zeros_initializer(), moving_variance_initializer=tf.ones_initializer(), beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, training=False, trainable=True, name=None, reuse=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, virtual_batch_size=None, adjustment=None )



3.contrib封装的layers，和2差不多，但是默认的scale是False，就是默认不缩放只偏移了，反正，可能偏移更重要些，避开那个线性区间。
注意看updates_collections，其实不是死的，默认放在了tf.GraphKeys.UPDATE_OPS这个key下，第二个接口是写死在了这个key下。

updates_collections是只有contrib才有的参数？
tf.contrib.layers.batch_norm( inputs, decay=0.999, 
center=True, 
scale=False, 
epsilon=0.001, activation_fn=None, param_initializers=None, param_regularizers=None, updates_collections=tf.GraphKeys.UPDATE_OPS,
is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, batch_weights=None, fused=None, data_format=DATA_FORMAT_NHWC, zero_debias_moving_mean=False, scope=None, renorm=False, renorm_clipping=None, renorm_decay=0.99, adjustment=None )

## 和我的例子同样接口，另一套实现，差不太多，先不看了
https://blog.csdn.net/huitailangyz/article/details/85015611
import tensorflow as tf def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.99): """ Assume nd [batch, N1, N2, ..., Nm, Channel] tensor""" with tf.variable_scope(name_scope): size = x.get_shape().as_list()[-1] scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1)) offset = tf.get_variable('offset', [size]) pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False) pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer(), trainable=False) batch_mean, batch_var = tf.nn.moments(x, list(range(len(x.get_shape())-1))) train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) def batch_statistics(): with tf.control_dependencies([train_mean_op, train_var_op]): return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) def population_statistics(): return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) return tf.cond(training, batch_statistics, population_statistics) is_traing = tf.placeholder(dtype=tf.bool) input = tf.ones([1, 2, 2, 3]) output = batch_norm(input, name_scope='batch_norm_nn', training=is_traing) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.save(sess, "batch_norm_nn/Model")


### 扩展：一个control_dependencies的练习 

In [6]:
#单独的add操作，是x1+x2，输出3
x1_ = tf.Variable(1)
x2_ = tf.Variable(2)
update_op = tf.assign(x1_,10)
add = tf.add(x1_,x2_)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    res1_,  = sess.run([add])
    print('Add: ',res1_)
#加依赖y1自赋值10的update_op，被依赖，res2_是y1_+y2_，但是是y1_自赋值10后的，所以是10+2=12
y1_ = tf.Variable(1)
y2_ = tf.Variable(2)
update_op = tf.assign(y1_, 10)#重名

with tf.control_dependencies([update_op]):
    add_with_dependencies = tf.add(y1_, y2_)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    res1_, res2_ = sess.run([add, add_with_dependencies])
    print('Add: ',res1_)
    print('Add_with_dependencies: ',res2_)

Add:  3
Add:  3
Add_with_dependencies:  12


### 获取tf.GraphKeys.UPDATE_OPS打印BN的更新操作

默认放在了这个key下

只有是tf.contrib.layers.batch_norm接口才能设置updates_collections
可以改一下key试试,但是key不是自己随便能加的？只有那几个固定的，也不知道放到什么key测才合适。

key都是定死的？不能自定义？

In [4]:
is_training = tf.placeholder(dtype = tf.bool)
input = tf.ones([1,2,2,3])
output = tf.layers.batch_normalization(input, training = is_training)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print(update_ops)#直接打印就可以看到了。

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.save(sess, "batch_norm_layer/Model")
    
is_training = tf.placeholder(dtype = tf.bool)
input = tf.ones([1,2,2,3])
output = tf.layers.batch_normalization(input, training = is_training)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print('UPDATE_OPS: ',update_ops)


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.save(sess, "batch_norm_layer/Model")


[<tf.Tensor 'batch_normalization/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_2/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_2/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_3/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_3/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>]
UPDATE_OPS:  [<tf.Tensor 'batch_normalization/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_2/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_2/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_3/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'batch_normalization_3/AssignMovingAvg_1:0' shap

In [2]:
is_training = tf.placeholder(dtype = tf.bool)
input = tf.ones([1,2,2,3])
output = tf.contrib.layers.batch_norm(input, is_training = is_training)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print(update_ops)#直接打印就可以看到了。

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.save(sess, "batch_norm_layer/Model")


[<tf.Tensor 'BatchNorm/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'BatchNorm/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>]


In [3]:
#本想把这个操作存在tf.GraphKeys.UPDATE_OPS之外的地方，没有合适操作
#强行把ops放到VARIABLES也不是个事儿，这个执行不了
'''
is_training = tf.placeholder(dtype = tf.bool)
input = tf.ones([1,2,2,3])
#output = tf.layers.batch_normalization(input, training = is_training, update_collections = tf.GraphKeys.my_ops)
output = tf.contrib.layers.batch_norm(input, is_training = is_training, updates_collections = tf.GraphKeys.TRAINABLE_VARIABLES)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print('UPDATE_OPS: ',update_ops)

my_ops = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
print('my ops: ', my_ops)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.save(sess, "batch_norm_layer/Model")
'''
    '''
UPDATE_OPS:  [<tf.Tensor 'BatchNorm/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'BatchNorm/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>]
my ops:  [<tf.Variable 'BatchNorm/beta:0' shape=(3,) dtype=float32_ref>, <tf.Variable 'BatchNorm_1/beta:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'BatchNorm_1/cond/Merge:0' shape=(3,) dtype=float32>, <tf.Tensor 'BatchNorm_1/cond/Merge_1:0' shape=(3,) dtype=float32>]
WARNING:tensorflow:Error encountered when serializing trainable_variables.
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'Tensor' object has no attribute 'to_proto'
'''

UPDATE_OPS:  [<tf.Tensor 'BatchNorm/AssignMovingAvg:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'BatchNorm/AssignMovingAvg_1:0' shape=(3,) dtype=float32_ref>]
my ops:  [<tf.Variable 'BatchNorm/beta:0' shape=(3,) dtype=float32_ref>, <tf.Variable 'BatchNorm_1/beta:0' shape=(3,) dtype=float32_ref>, <tf.Tensor 'BatchNorm_1/cond/Merge:0' shape=(3,) dtype=float32>, <tf.Tensor 'BatchNorm_1/cond/Merge_1:0' shape=(3,) dtype=float32>]
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'Tensor' object has no attribute 'to_proto'


In [13]:
help(tf.GraphKeys)

Help on class GraphKeys in module tensorflow.python.framework.ops:

class GraphKeys(builtins.object)
 |  Standard names to use for graph collections.
 |  
 |  The standard library uses various well-known names to collect and
 |  retrieve values associated with a graph. For example, the
 |  `tf.Optimizer` subclasses default to optimizing the variables
 |  collected under `tf.GraphKeys.TRAINABLE_VARIABLES` if none is
 |  specified, but it is also possible to pass an explicit list of
 |  variables.
 |  
 |  The following standard keys are defined:
 |  
 |  * `GLOBAL_VARIABLES`: the default collection of `Variable` objects, shared
 |    across distributed environment (model variables are subset of these). See
 |    @{tf.global_variables}
 |    for more details.
 |    Commonly, all `TRAINABLE_VARIABLES` variables will be in `MODEL_VARIABLES`,
 |    and all `MODEL_VARIABLES` variables will be in `GLOBAL_VARIABLES`.
 |  * `LOCAL_VARIABLES`: the subset of `Variable` objects that are local to e

### ndarray基本操作验证

In [29]:
#错误写法，不能保证数值变2,是list扩一倍
label1_ = np.array([
    [[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]],[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]]],
    [[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]],[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]]],
]*2, dtype=np.float)
print(label1_.shape)
#正确写法
label1_ = np.array([
    [[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]],[[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]]],
    [[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]],[[11.,12.,13.],[14.,15.,16.],[17.,18.,19.]]],
], dtype=np.float)
label2_ = np.multiply(label1_,2)
print(label2_.shape)
print(label2_)
print(label1_)#不影响原array

(4, 2, 3, 3)
(2, 2, 3, 3)
[[[[  2.   4.   6.]
   [  8.  10.  12.]
   [ 14.  16.  18.]]

  [[  2.   4.   6.]
   [  8.  10.  12.]
   [ 14.  16.  18.]]]


 [[[ 22.  24.  26.]
   [ 28.  30.  32.]
   [ 34.  36.  38.]]

  [[ 22.  24.  26.]
   [ 28.  30.  32.]
   [ 34.  36.  38.]]]]
[[[[  1.   2.   3.]
   [  4.   5.   6.]
   [  7.   8.   9.]]

  [[  1.   2.   3.]
   [  4.   5.   6.]
   [  7.   8.   9.]]]


 [[[ 11.  12.  13.]
   [ 14.  15.  16.]
   [ 17.  18.  19.]]

  [[ 11.  12.  13.]
   [ 14.  15.  16.]
   [ 17.  18.  19.]]]]


## 其实他这个问题和我之前的一个误操作的最终运行情况类似——等于使用了初始值去跑inference——不过我那是根本就不训练直接跑，他是不加依赖关系所以没更新变量。
https://blog.csdn.net/huitailangyz/article/details/85015611
笔者最先使用时只是了解到了在tensorflow中tf.layers.batch_normalization这个函数，就在函数中直接将其使用，该函数中有一个参数为training，在训练阶段赋值True，在测试阶段赋值False。但是在训练完成后，出现了奇怪的现象时，在training赋值为True时，测试的正确率正常，但是training赋值为False时，测试正确率就很低。上述错误使用过程可以精简为下列代码段

is_traing = tf.placeholder(dtype=tf.bool)
input = tf.ones([1, 2, 2, 3])
output = tf.layers.batch_normalization(input, training=is_traing)
loss = ...
train_op = optimizer.minimize(loss)

with tf.Session() as sess:
	sess.run(tf.global_variables_initializer())
	sess.run(train_op)
    
作者贴出了tensorflow中BN的实现！！！！！关键点可能是这个_do_update没执行？
大概是吧，第一个函数中update_delta和assign_sub算是计算学习率和更新变量吧。
def _assign_moving_average(self, variable, value, momentum): 
    with ops.name_scope(None, 'AssignMovingAvg', [variable, value, momentum]) as scope:
        decay = ops.convert_to_tensor(1.0 - momentum, name='decay')
        if decay.dtype != variable.dtype.base_dtype:
            decay = math_ops.cast(decay, variable.dtype.base_dtype)
        update_delta = (variable - value) * decay
        return state_ops.assign_sub(variable, update_delta, name=scope) 
def _do_update(var, value): 
    return self._assign_moving_average(var, value, self.momentum)


    
可以看到其内部逻辑和我在介绍tf.nn.batch_normalization一节中展示的封装时所使用的方法类似。
如果不在使用时添加tf.control_dependencies函数，即在训练时(training=True)每批次时只会计算当批次的mean和var，并传递给tf.nn.batch_normalization进行归一化，由于mean_update和variance_update在计算图中并不在上述操作的依赖路径上，因为并不会主动完成，也就是说，在训练时mean_update和variance_update并不会被使用到，其值一直是初始值。因此在测试阶段(training=False)使用这两个作为mean和variance并进行归一化操作，这样就会出现错误。而如果使用tf.control_dependencies函数，会在训练阶段每次训练操作执行前被动地去执行mean_update和variance_update，因此moving_mean和moving_variance会被不断更新，在测试时使用该参数也就不会出现错误。


### moments接口，得到输入张量的平均值和方差。

In [50]:
x = tf.constant([[1.,2.,3.],[5.,4.,3.]])
#mean, variance = tf.nn.moments(x)#默认不是0，不允许不输入axes
mean, variance = tf.nn.moments(x, axes=[0])#axes=[0]就是样本间同特征的操作，1就是样本内部，BN使用0
mean, variance = tf.nn.moments(x, axes=[0],keep_dims=True)#保持维度并不影响计算过程，是最后输出的包装。
with tf.Session() as sess:
    print(sess.run(mean))
    print(sess.run(variance))

[[ 3.  3.  3.]]
[[ 4.  1.  0.]]


### 扩展：带channel的情况
之前没多想channel怎么处理，其实也不复杂，不像CNN那种filter跨通道合并，每个channel都是独立算的，其实和单通道是一样的。
已经实现了例2：所谓每个channel独立计算，至少在这个例子和我那个模板下，一个channel只有一个mean和beta。但是也许可以变形扩展吧。


for i in range(channel):
    x = input[:,:,:,i]
    mean = mean(x)
    variance = variance(x)
    x = (x - mean) / sqrt(variance)
    x = scale * x + offset
    input[:,:,:,i] = x