# Batch Normalization을 하는 코드
출처 - https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-10-6-mnist_nn_batchnorm.ipynb 


* 1. tf.layers.batch_normalization(net, training=self.mode)

* 2.  train 과정에서 moviing_mean과 moving_var이 직접적으로 호출이 안되고 train과 별도로 moving_mean과 moving_var에 대한 op를 실행시켜야 한다. 이걸 update_ops=tf.get_collection(tf.Graphkeys.UPDATE_OPS)가 하니까 이와 같은 op를 넘겨 받아서 sess.run에서 실행한다.



            # tf.get_collection(tf.GraphKeys.UPDATA_OPS, scope=none) => BN 
           update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=name)
           with tf.control_dependencies(update_ops):    # control dependency 추가
                    self.train_op = optimizer(lr).minimize(self.loss)


In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [2]:
mnist.train.images.shape

(55000, 784)

In [3]:
class Model(object):
    def __init__(self, name, input_dim, output_dim, hidden_dims=[32, 32], use_batchnorm=True, activation_fn=tf.nn.relu, optimizer=tf.train.AdamOptimizer, lr=0.01):
      
            with tf.variable_scope(name):
                # Placeholders are defined
                self.X = tf.placeholder(tf.float32, [None, input_dim], name='X')
                self.y = tf.placeholder(tf.float32, [None, output_dim], name='y')
                self.mode = tf.placeholder(tf.bool, name='train_mode')   


                # Loop over hidden layers
                net = self.X
                for i, h_dim in enumerate(hidden_dims):
                    with tf.variable_scope('layer{}'.format(i)):
                        net = tf.layers.dense(net, h_dim)

                        if use_batchnorm:
                            net = tf.layers.batch_normalization(net, training=self.mode)

                        net = activation_fn(net)

                # Attach fully connected layers
                net = tf.contrib.layers.flatten(net)
                net = tf.layers.dense(net, output_dim)

                self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=self.y)
                self.loss = tf.reduce_mean(self.loss, name='loss')    

                # When using the batchnormalization layers,
                # it is necessary to manually add the update operations
                # because the moving averages are not included in the graph    

                # tf.get_collection(tf.GraphKeys.UPDATA_OPS, scope=none) => BN 
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=name)
                with tf.control_dependencies(update_ops):    # control dependency 추가                 
                    self.train_op = optimizer(lr).minimize(self.loss)

                # Accuracy etc 
                softmax = tf.nn.softmax(net, name='softmax')
                self.accuracy = tf.equal(tf.argmax(softmax, 1), tf.argmax(self.y, 1))
                self.accuracy = tf.reduce_mean(tf.cast(self.accuracy, tf.float32))

In [4]:
class Solver:
    
    def __init__(self, sess, model):
        self.model = model
        self.sess = sess
        
    def train(self, X, y):
        feed = {
            self.model.X: X,
            self.model.y: y,
            self.model.mode: True
        }
        train_op = self.model.train_op
        loss = self.model.loss
        
        return self.sess.run([train_op, loss], feed_dict=feed)
    
    def evaluate(self, X, y, batch_size=None):
        if batch_size:
            N = X.shape[0]
            
            total_loss = 0
            total_acc = 0
            
            for i in range(0, N, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                
                feed = {
                    self.model.X: X_batch,
                    self.model.y: y_batch,
                    self.model.mode: False
                }
                
                loss = self.model.loss
                accuracy = self.model.accuracy
                
                step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed)
                
                total_loss += step_loss * X_batch.shape[0] # 배치 크기 * 구간 loss 
                total_acc += step_acc * X_batch.shape[0] # 배치 크기 * 구간 정확도
            
            total_loss /= N
            total_acc /= N
            
            return total_loss, total_acc
    
        else:
            feed = {
                self.model.X: X,
                self.model.y: y,
                self.model.mode: False
            }
            
            loss = self.model.loss            
            accuracy = self.model.accuracy

            return self.sess.run([loss, accuracy], feed_dict=feed)

In [5]:
input_dim = 784
output_dim = 10
N = 55000

tf.reset_default_graph()
sess = tf.InteractiveSession()

# We create two models: one with the batch norm and other without
bn = Model('batchnorm', input_dim, output_dim, use_batchnorm=True)
nn = Model('no_norm', input_dim, output_dim, use_batchnorm=False)

# We create two solvers: to train both models at the same time for comparison
# Usually we only need one solver class
bn_solver = Solver(sess, bn)
nn_solver = Solver(sess, nn)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [6]:
epoch_n = 10
batch_size = 32

train_losses = []
train_accs = []

valid_losses = []
valid_accs = []

In [7]:
init = tf.global_variables_initializer()
sess.run(init)

for epoch in range(epoch_n):
    for _ in range(N//batch_size):
        X_batch, y_batch= mnist.train.next_batch(batch_size)
        
        _, bn_loss = bn_solver.train(X_batch, y_batch)
        _, nn_loss = nn_solver.train(X_batch, y_batch)
        
    b_loss, b_acc = bn_solver.evaluate(mnist.train.images, mnist.train.labels, batch_size)
    n_loss, n_acc = nn_solver.evaluate(mnist.train.images, mnist.train.labels, batch_size)
    
    train_losses.append([b_loss, n_loss])
    train_accs.append([b_acc, n_acc])
    print(f'[Epoch {epoch}-TRAIN] Batchnorm Loss(Acc): {b_loss:.5f}({b_acc:.2%}) vs No Batchnorm Loss(Acc): {n_loss:.5f}({n_acc:.2%})')
    
    b_loss, b_acc = bn_solver.evaluate(mnist.validation.images, mnist.validation.labels)
    n_loss, n_acc = nn_solver.evaluate(mnist.validation.images, mnist.validation.labels)
    
    # Save valid losses/acc
    valid_losses.append([b_loss, n_loss])
    valid_accs.append([b_acc, n_acc])
    print(f'[Epoch {epoch}-VALID] Batchnorm Loss(Acc): {b_loss:.5f}({b_acc:.2%}) vs No Batchnorm Loss(Acc): {n_loss:.5f}({n_acc:.2%})\n')
   
    

[Epoch 0-TRAIN] Batchnorm Loss(Acc): 0.13693(95.81%) vs No Batchnorm Loss(Acc): 0.24393(92.91%)
[Epoch 0-VALID] Batchnorm Loss(Acc): 0.14058(95.78%) vs No Batchnorm Loss(Acc): 0.24739(92.84%)

[Epoch 1-TRAIN] Batchnorm Loss(Acc): 0.12758(95.82%) vs No Batchnorm Loss(Acc): 0.16997(95.00%)
[Epoch 1-VALID] Batchnorm Loss(Acc): 0.14236(96.00%) vs No Batchnorm Loss(Acc): 0.17680(94.62%)

[Epoch 2-TRAIN] Batchnorm Loss(Acc): 0.08615(97.37%) vs No Batchnorm Loss(Acc): 0.18812(94.51%)
[Epoch 2-VALID] Batchnorm Loss(Acc): 0.11321(96.70%) vs No Batchnorm Loss(Acc): 0.19073(94.68%)

[Epoch 3-TRAIN] Batchnorm Loss(Acc): 0.07394(97.64%) vs No Batchnorm Loss(Acc): 0.16787(94.96%)
[Epoch 3-VALID] Batchnorm Loss(Acc): 0.10075(97.16%) vs No Batchnorm Loss(Acc): 0.19988(94.46%)

[Epoch 4-TRAIN] Batchnorm Loss(Acc): 0.07484(97.65%) vs No Batchnorm Loss(Acc): 0.18691(94.93%)
[Epoch 4-VALID] Batchnorm Loss(Acc): 0.10847(97.06%) vs No Batchnorm Loss(Acc): 0.23278(94.46%)

[Epoch 5-TRAIN] Batchnorm Loss(Acc)