In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [2]:
mnist.train.images.shape

(55000, 784)

In [3]:
class Model(object):
    def __init__(self, name, input_dim, output_dim, hidden_dims=[32, 32], use_batchnorm=True, activation_fn=tf.nn.relu, optimizer=tf.train.AdamOptimizer, lr=0.01):
      
            with tf.variable_scope(name):
                # Placeholders are defined
                self.X = tf.placeholder(tf.float32, [None, input_dim], name='X')
                self.y = tf.placeholder(tf.float32, [None, output_dim], name='y')
                self.mode = tf.placeholder(tf.bool, name='train_mode')   


                # Loop over hidden layers
                net = self.X
                for i, h_dim in enumerate(hidden_dims):
                    with tf.variable_scope('layer{}'.format(i)):
                        net = tf.layers.dense(net, h_dim)

                        if use_batchnorm:
                            net = tf.layers.batch_normalization(net, training=self.mode)

                        net = activation_fn(net)

                # Attach fully connected layers
                net = tf.contrib.layers.flatten(net)
                net = tf.layers.dense(net, output_dim)

                self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=self.y)
                self.loss = tf.reduce_mean(self.loss, name='loss')    

                # When using the batchnormalization layers,
                # it is necessary to manually add the update operations
                # because the moving averages are not included in the graph    

                # tf.get_collection(tf.GraphKeys.UPDATA_OPS, scope=none) => BN 
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=name)
                with tf.control_dependencies(update_ops):    # control dependency 추가                 
                    self.train_op = optimizer(lr).minimize(self.loss)

                # Accuracy etc 
                softmax = tf.nn.softmax(net, name='softmax')
                self.accuracy = tf.equal(tf.argmax(softmax, 1), tf.argmax(self.y, 1))
                self.accuracy = tf.reduce_mean(tf.cast(self.accuracy, tf.float32))

In [4]:
class Solver:
    
    def __init__(self, sess, model):
        self.model = model
        self.sess = sess
        
    def train(self, X, y):
        feed = {
            self.model.X: X,
            self.model.y: y,
            self.model.mode: True
        }
        train_op = self.model.train_op
        loss = self.model.loss
        
        return self.sess.run([train_op, loss], feed_dict=feed)
    
    def evaluate(self, X, y, batch_size=None):
        if batch_size:
            N = X.shape[0]
            
            total_loss = 0
            total_acc = 0
            
            for i in range(0, N, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                
                feed = {
                    self.model.X: X_batch,
                    self.model.y: y_batch,
                    self.model.mode: False
                }
                
                loss = self.model.loss
                accuracy = self.model.accuracy
                
                step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed)
                
                total_loss += step_loss * X_batch.shape[0] # 배치 크기 * 구간 loss 
                total_acc += step_acc * X_batch.shape[0] # 배치 크기 * 구간 정확도
            
            total_loss /= N
            total_acc /= N
            
            return total_loss, total_acc
    
        else:
            feed = {
                self.model.X: X,
                self.model.y: y,
                self.model.mode: False
            }
            
            loss = self.model.loss            
            accuracy = self.model.accuracy

            return self.sess.run([loss, accuracy], feed_dict=feed)

In [5]:
input_dim = 784
output_dim = 10
N = 55000

tf.reset_default_graph()
sess = tf.InteractiveSession()

# We create two models: one with the batch norm and other without
bn = Model('batchnorm', input_dim, output_dim, use_batchnorm=True)
nn = Model('no_norm', input_dim, output_dim, use_batchnorm=False)

# We create two solvers: to train both models at the same time for comparison
# Usually we only need one solver class
bn_solver = Solver(sess, bn)
nn_solver = Solver(sess, nn)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [6]:
epoch_n = 10
batch_size = 32

train_losses = []
train_accs = []

valid_losses = []
valid_accs = []

In [7]:
init = tf.global_variables_initializer()
sess.run(init)

for epoch in range(epoch_n):
    for _ in range(N//batch_size):
        X_batch, y_batch= mnist.train.next_batch(batch_size)
        
        _, bn_loss = bn_solver.train(X_batch, y_batch)
        _, nn_loss = nn_solver.train(X_batch, y_batch)
        
    b_loss, b_acc = bn_solver.evaluate(mnist.train.images, mnist.train.labels, batch_size)
    n_loss, n_acc = nn_solver.evaluate(mnist.train.images, mnist.train.labels, batch_size)
    
    train_losses.append([b_loss, n_loss])
    train_accs.append([b_acc, n_acc])
    print(f'[Epoch {epoch}-TRAIN] Batchnorm Loss(Acc): {b_loss:.5f}({b_acc:.2%}) vs No Batchnorm Loss(Acc): {n_loss:.5f}({n_acc:.2%})')
    
    b_loss, b_acc = bn_solver.evaluate(mnist.validation.images, mnist.validation.labels)
    n_loss, n_acc = nn_solver.evaluate(mnist.validation.images, mnist.validation.labels)
    
    # Save valid losses/acc
    valid_losses.append([b_loss, n_loss])
    valid_accs.append([b_acc, n_acc])
    print(f'[Epoch {epoch}-VALID] Batchnorm Loss(Acc): {b_loss:.5f}({b_acc:.2%}) vs No Batchnorm Loss(Acc): {n_loss:.5f}({n_acc:.2%})\n')
   
    

InvalidArgumentError: Cannot assign a device for operation 'batchnorm/gradients/batchnorm/softmax_cross_entropy_with_logits_sg_grad/LogSoftmax': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
  device='CPU'; T in [DT_HALF]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_DOUBLE]

	 [[Node: batchnorm/gradients/batchnorm/softmax_cross_entropy_with_logits_sg_grad/LogSoftmax = LogSoftmax[T=DT_FLOAT, _device="/device:GPU:0"](batchnorm/softmax_cross_entropy_with_logits_sg/Reshape, ^batchnorm/layer0/batch_normalization/cond_2/Merge, ^batchnorm/layer0/batch_normalization/cond_3/Merge, ^batchnorm/layer1/batch_normalization/cond_2/Merge, ^batchnorm/layer1/batch_normalization/cond_3/Merge)]]

Caused by op 'batchnorm/gradients/batchnorm/softmax_cross_entropy_with_logits_sg_grad/LogSoftmax', defined at:
  File "C:\Users\gunooknam\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\gunooknam\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\gunooknam\Anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\Users\gunooknam\Anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "C:\Users\gunooknam\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tornado\ioloop.py", line 759, in _run_callback
    ret = callback()
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-87aaed0bf2bc>", line 9, in <module>
    bn = Model('batchnorm', input_dim, output_dim, use_batchnorm=True)
  File "<ipython-input-3-50932d0d36ae>", line 38, in __init__
    self.train_op = optimizer(lr).minimize(self.loss)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 399, in minimize
    grad_loss=grad_loss)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 511, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 532, in gradients
    gate_gradients, aggregation_method, stop_gradients)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 701, in _GradientsHelper
    lambda: grad_fn(op, *out_grads))
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 396, in _MaybeCompile
    return grad_fn()  # Exit early
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 701, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_grad.py", line 481, in _SoftmaxCrossEntropyWithLogitsGrad
    return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 432, in new_func
    return func(*args, **kwargs)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1768, in log_softmax
    return _softmax(logits, gen_nn_ops.log_softmax, axis, name)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1673, in _softmax
    return compute_op(logits, name=name)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 4992, in log_softmax
    "LogSoftmax", logits=logits, name=name)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
    op_def=op_def)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op 'batchnorm/softmax_cross_entropy_with_logits_sg', defined at:
  File "C:\Users\gunooknam\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
[elided 24 identical lines from previous traceback]
  File "<ipython-input-5-87aaed0bf2bc>", line 9, in <module>
    bn = Model('batchnorm', input_dim, output_dim, use_batchnorm=True)
  File "<ipython-input-3-50932d0d36ae>", line 28, in __init__
    self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=self.y)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 250, in new_func
    return func(*args, **kwargs)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1968, in softmax_cross_entropy_with_logits
    labels=labels, logits=logits, dim=dim, name=name)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1879, in softmax_cross_entropy_with_logits_v2
    precise_logits, labels, name=name)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 7738, in softmax_cross_entropy_with_logits
    name=name)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
    op_def=op_def)
  File "C:\Users\gunooknam\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Cannot assign a device for operation 'batchnorm/gradients/batchnorm/softmax_cross_entropy_with_logits_sg_grad/LogSoftmax': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
  device='CPU'; T in [DT_HALF]
  device='CPU'; T in [DT_FLOAT]
  device='CPU'; T in [DT_DOUBLE]

	 [[Node: batchnorm/gradients/batchnorm/softmax_cross_entropy_with_logits_sg_grad/LogSoftmax = LogSoftmax[T=DT_FLOAT, _device="/device:GPU:0"](batchnorm/softmax_cross_entropy_with_logits_sg/Reshape, ^batchnorm/layer0/batch_normalization/cond_2/Merge, ^batchnorm/layer0/batch_normalization/cond_3/Merge, ^batchnorm/layer1/batch_normalization/cond_2/Merge, ^batchnorm/layer1/batch_normalization/cond_3/Merge)]]
