In [1]:
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""A binary to train CIFAR-10 using a single GPU.
Accuracy:
cifar10_train.py achieves ~86% accuracy after 100K steps (256 epochs of
data) as judged by cifar10_eval.py.
Speed: With batch_size 128.
System        | Step Time (sec/batch)  |     Accuracy
------------------------------------------------------------------
1 Tesla K20m  | 0.35-0.60              | ~86% at 60K steps  (5 hours)
1 Tesla K40m  | 0.25-0.35              | ~86% at 100K steps (4 hours)
Usage:
Please see the tutorial and website for how to download the CIFAR-10
data set, compile the program and train the model.
http://tensorflow.org/tutorials/deep_cnn/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datetime import datetime
import time

import tensorflow as tf

import cifar10

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train',
                           """Directory where to write event logs """
                           """and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 1000000,
                            """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
                            """Whether to log device placement.""")
tf.app.flags.DEFINE_integer('log_frequency', 10,
                            """How often to log results to the console.""")


def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.train.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
      images, labels = cifar10.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.b
    logits = cifar10.inference(images)

    # Calculate loss.
    loss = cifar10.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1
        self._start_time = time.time()

      def before_run(self, run_context):
        self._step += 1
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        if self._step % FLAGS.log_frequency == 0:
          current_time = time.time()
          duration = current_time - self._start_time
          self._start_time = current_time

          loss_value = run_values.results
          print(FLAGS.batch_size)
          examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
          sec_per_batch = float(duration / FLAGS.log_frequency)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement)) as mon_sess:
      while not mon_sess.should_stop():
        mon_sess.run(train_op)


def main(argv=None):  # pylint: disable=unused-argument
  cifar10.maybe_download_and_extract()
  if tf.gfile.Exists(FLAGS.train_dir):
    tf.gfile.DeleteRecursively(FLAGS.train_dir)
  tf.gfile.MakeDirs(FLAGS.train_dir)
  train()


if __name__ == '__main__':
  tf.app.run()

(128, 32, 3)


<matplotlib.figure.Figure at 0x231cc2bfb70>

Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.CancelledError'>, Enqueue operation was cancelled
	 [[Node: input_producer/input_producer_EnqueueMany = QueueEnqueueManyV2[Tcomponents=[DT_STRING], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer, input_producer/RandomShuffle)]]

(128, 4, 32, 32, 3)
conv1
	(128, 4, 32, 32, 3) --> (128, 4, 32, 32, 64)
pool1
	(128, 4, 32, 32, 64) --> (128, 4, 16, 16, 64)


  from ._conv import register_converters as _register_converters


norm1
	(128, 4, 16, 16, 64) --> (128, 4, 16, 16, 64)
conv2
	(128, 4, 16, 16, 64) --> (128, 4, 16, 16, 64)
norm2
	(128, 4, 16, 16, 64) --> (128, 4, 16, 16, 64)
pool2
	(128, 4, 16, 16, 64) --> (128, 4, 8, 8, 64)
conv2
	(128, 4, 8, 8, 64) --> (128, 4, 8, 8, 64)
norm3
	(128, 4, 8, 8, 64) --> (128, 4, 8, 8, 64)
pool3
	(128, 4, 8, 8, 64) --> (128, 4, 4, 4, 64)
conv4
	(128, 4, 4, 4, 64) --> (128, 4, 4, 4, 64)
norm3
	(128, 4, 4, 4, 64) --> (128, 4, 4, 4, 64)
pool4
	(128, 4, 4, 4, 64) --> (128, 4, 2, 2, 64)
conv5
	(128, 4, 2, 2, 64) --> (128, 4, 2, 2, 64)
norm5
	(128, 4, 2, 2, 64) --> (128, 4, 4, 4, 64)
pool5
	(128, 4, 4, 4, 64) --> (128, 4, 2, 2, 64)
INFO:tensorflow:Summary name conv1/weight_loss (raw) is illegal; using conv1/weight_loss__raw_ instead.
INFO:tensorflow:Summary name conv2/weight_loss (raw) is illegal; using conv2/weight_loss__raw_ instead.
INFO:tensorflow:Summary name conv3/weight_loss (raw) is illegal; using conv3/weight_loss__raw_ instead.
INFO:tensorflow:Summary name conv4/we

128
2018-06-02 22:30:03.159963: step 670, loss = 1.75 (430.0 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:30:06.138998: step 680, loss = 1.49 (429.7 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:30:09.120025: step 690, loss = 1.82 (429.2 examples/sec; 0.298 sec/batch)
INFO:tensorflow:global_step/sec: 3.20175
128
2018-06-02 22:30:13.444924: step 700, loss = 1.72 (296.7 examples/sec; 0.431 sec/batch)
128
2018-06-02 22:30:16.439915: step 710, loss = 1.76 (425.8 examples/sec; 0.301 sec/batch)
128
2018-06-02 22:30:19.427968: step 720, loss = 1.61 (428.4 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:30:22.443904: step 730, loss = 1.77 (424.4 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:30:25.441856: step 740, loss = 1.56 (427.0 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:30:28.441834: step 750, loss = 1.58 (426.7 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:30:31.463579: step 760, loss = 1.43 (423.6 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:30:34.463497: step 77

128
2018-06-02 22:34:31.229408: step 1520, loss = 1.05 (424.4 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:34:34.254321: step 1530, loss = 1.08 (423.0 examples/sec; 0.303 sec/batch)
128
2018-06-02 22:34:37.241553: step 1540, loss = 0.98 (428.5 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:34:40.222301: step 1550, loss = 1.19 (429.6 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:34:43.204446: step 1560, loss = 1.23 (429.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:34:46.197484: step 1570, loss = 1.10 (427.8 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:34:49.187495: step 1580, loss = 1.03 (428.0 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:34:52.189763: step 1590, loss = 1.05 (426.5 examples/sec; 0.300 sec/batch)
INFO:tensorflow:global_step/sec: 3.20767
128
2018-06-02 22:34:56.345925: step 1600, loss = 1.06 (308.3 examples/sec; 0.415 sec/batch)
128
2018-06-02 22:34:59.338869: step 1610, loss = 1.00 (427.0 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:35:02.33933

128
2018-06-02 22:38:54.002234: step 2360, loss = 1.21 (427.4 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:38:56.992108: step 2370, loss = 0.83 (428.1 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:38:59.977477: step 2380, loss = 0.92 (428.9 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:39:02.953628: step 2390, loss = 0.77 (430.1 examples/sec; 0.298 sec/batch)
INFO:tensorflow:global_step/sec: 3.21212
128
2018-06-02 22:39:07.204426: step 2400, loss = 0.78 (302.0 examples/sec; 0.424 sec/batch)
128
2018-06-02 22:39:10.185588: step 2410, loss = 0.78 (427.4 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:39:13.158135: step 2420, loss = 0.82 (430.6 examples/sec; 0.297 sec/batch)
128
2018-06-02 22:39:16.146629: step 2430, loss = 0.80 (428.3 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:39:19.121903: step 2440, loss = 0.67 (430.2 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:39:22.140534: step 2450, loss = 0.68 (424.2 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:39:25.15727

128
2018-06-02 22:43:19.734210: step 3210, loss = 0.69 (427.3 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:43:22.726405: step 3220, loss = 0.56 (427.9 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:43:25.727794: step 3230, loss = 0.74 (426.3 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:43:28.745603: step 3240, loss = 0.70 (424.3 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:43:31.753871: step 3250, loss = 0.73 (425.4 examples/sec; 0.301 sec/batch)
128
2018-06-02 22:43:34.761188: step 3260, loss = 0.69 (425.6 examples/sec; 0.301 sec/batch)
128
2018-06-02 22:43:37.773987: step 3270, loss = 0.59 (425.0 examples/sec; 0.301 sec/batch)
128
2018-06-02 22:43:40.771454: step 3280, loss = 0.55 (426.9 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:43:43.766821: step 3290, loss = 0.68 (427.3 examples/sec; 0.300 sec/batch)
INFO:tensorflow:global_step/sec: 2.90267
128
2018-06-02 22:43:51.233536: step 3300, loss = 0.63 (171.7 examples/sec; 0.745 sec/batch)
128
2018-06-02 22:43:54.23900

128
2018-06-02 22:47:48.258480: step 4050, loss = 0.54 (423.2 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:47:51.239772: step 4060, loss = 0.66 (429.2 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:47:54.229691: step 4070, loss = 0.65 (428.1 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:47:57.227620: step 4080, loss = 0.43 (427.0 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:48:00.200785: step 4090, loss = 0.67 (430.5 examples/sec; 0.297 sec/batch)
INFO:tensorflow:global_step/sec: 3.21394
128
2018-06-02 22:48:04.336265: step 4100, loss = 0.68 (309.5 examples/sec; 0.414 sec/batch)
128
2018-06-02 22:48:07.330833: step 4110, loss = 0.46 (427.4 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:48:10.352144: step 4120, loss = 0.65 (423.7 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:48:13.493749: step 4130, loss = 0.69 (408.0 examples/sec; 0.314 sec/batch)
128
2018-06-02 22:48:16.660723: step 4140, loss = 0.62 (403.8 examples/sec; 0.317 sec/batch)
128
2018-06-02 22:48:29.65135

INFO:tensorflow:global_step/sec: 3.21944
128
2018-06-02 22:52:23.845312: step 4900, loss = 0.39 (307.8 examples/sec; 0.416 sec/batch)
128
2018-06-02 22:52:26.816368: step 4910, loss = 0.65 (429.8 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:52:29.788421: step 4920, loss = 0.48 (430.7 examples/sec; 0.297 sec/batch)
128
2018-06-02 22:52:32.771482: step 4930, loss = 0.50 (429.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:52:35.765457: step 4940, loss = 0.46 (427.7 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:52:38.741480: step 4950, loss = 0.45 (430.0 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:52:41.739618: step 4960, loss = 0.59 (427.1 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:52:44.756549: step 4970, loss = 0.46 (424.1 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:52:47.774787: step 4980, loss = 0.42 (424.1 examples/sec; 0.302 sec/batch)
128
2018-06-02 22:52:50.798886: step 4990, loss = 0.43 (423.3 examples/sec; 0.302 sec/batch)
INFO:tensorflow:global_step/s

128
2018-06-02 22:56:47.840240: step 5750, loss = 0.49 (426.5 examples/sec; 0.300 sec/batch)
128
2018-06-02 22:56:50.834276: step 5760, loss = 0.44 (427.5 examples/sec; 0.299 sec/batch)
128
2018-06-02 22:56:53.810278: step 5770, loss = 0.43 (430.0 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:56:56.787348: step 5780, loss = 0.48 (430.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 22:56:59.808237: step 5790, loss = 0.45 (423.6 examples/sec; 0.302 sec/batch)
INFO:tensorflow:global_step/sec: 3.22071
128
2018-06-02 22:57:03.884199: step 5800, loss = 0.44 (314.0 examples/sec; 0.408 sec/batch)
128
2018-06-02 22:57:06.858490: step 5810, loss = 0.32 (430.4 examples/sec; 0.297 sec/batch)
128
2018-06-02 22:57:09.863963: step 5820, loss = 0.39 (425.9 examples/sec; 0.301 sec/batch)
128
2018-06-02 22:57:12.858759: step 5830, loss = 0.43 (427.4 examples/sec; 0.299 sec/batch)
INFO:tensorflow:Saving checkpoints for 5837 into /tmp/cifar10_train\model.ckpt.
128
2018-06-02 22:57:17.929870: step 5840,

128
2018-06-02 23:01:13.497150: step 6590, loss = 0.33 (428.6 examples/sec; 0.299 sec/batch)
INFO:tensorflow:global_step/sec: 3.18898
128
2018-06-02 23:01:17.716966: step 6600, loss = 0.34 (303.3 examples/sec; 0.422 sec/batch)
128
2018-06-02 23:01:20.699959: step 6610, loss = 0.61 (429.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:01:23.718886: step 6620, loss = 0.34 (424.0 examples/sec; 0.302 sec/batch)
128
2018-06-02 23:01:26.699930: step 6630, loss = 0.39 (429.4 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:01:29.682938: step 6640, loss = 0.49 (429.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:01:32.676930: step 6650, loss = 0.33 (427.5 examples/sec; 0.299 sec/batch)
128
2018-06-02 23:01:35.657960: step 6660, loss = 0.42 (429.4 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:01:38.644971: step 6670, loss = 0.34 (428.5 examples/sec; 0.299 sec/batch)
128
2018-06-02 23:01:41.627995: step 6680, loss = 0.22 (429.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:01:44.60403

128
2018-06-02 23:05:38.456981: step 7440, loss = 0.37 (431.5 examples/sec; 0.297 sec/batch)
128
2018-06-02 23:05:41.448332: step 7450, loss = 0.34 (427.8 examples/sec; 0.299 sec/batch)
128
2018-06-02 23:05:44.434120: step 7460, loss = 0.29 (428.7 examples/sec; 0.299 sec/batch)
128
2018-06-02 23:05:47.410451: step 7470, loss = 0.23 (430.1 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:05:50.387492: step 7480, loss = 0.36 (430.0 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:05:53.372174: step 7490, loss = 0.48 (428.9 examples/sec; 0.298 sec/batch)
INFO:tensorflow:global_step/sec: 3.23099
128
2018-06-02 23:05:57.492433: step 7500, loss = 0.41 (311.2 examples/sec; 0.411 sec/batch)
128
2018-06-02 23:06:00.508376: step 7510, loss = 0.27 (423.4 examples/sec; 0.302 sec/batch)
128
2018-06-02 23:06:03.492775: step 7520, loss = 0.41 (428.9 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:06:06.463549: step 7530, loss = 0.27 (430.9 examples/sec; 0.297 sec/batch)
128
2018-06-02 23:06:09.47744

128
2018-06-02 23:10:01.874509: step 8280, loss = 0.36 (426.3 examples/sec; 0.300 sec/batch)
128
2018-06-02 23:10:04.854128: step 8290, loss = 0.33 (429.6 examples/sec; 0.298 sec/batch)
INFO:tensorflow:global_step/sec: 3.22987
128
2018-06-02 23:10:08.877558: step 8300, loss = 0.41 (318.8 examples/sec; 0.402 sec/batch)
128
2018-06-02 23:10:11.866083: step 8310, loss = 0.45 (427.2 examples/sec; 0.300 sec/batch)
128
2018-06-02 23:10:14.845377: step 8320, loss = 0.31 (429.6 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:10:17.824421: step 8330, loss = 0.31 (429.7 examples/sec; 0.298 sec/batch)
128
2018-06-02 23:10:20.813082: step 8340, loss = 0.48 (428.3 examples/sec; 0.299 sec/batch)
128
2018-06-02 23:10:23.801522: step 8350, loss = 0.34 (428.5 examples/sec; 0.299 sec/batch)
128
2018-06-02 23:10:26.829160: step 8360, loss = 0.45 (422.8 examples/sec; 0.303 sec/batch)
128
2018-06-02 23:10:29.848335: step 8370, loss = 0.43 (424.1 examples/sec; 0.302 sec/batch)
128
2018-06-02 23:10:32.88728

128
2018-06-02 23:15:43.200468: step 9130, loss = 0.37 (36.5 examples/sec; 3.512 sec/batch)
128
2018-06-02 23:15:51.603992: step 9140, loss = 0.29 (152.3 examples/sec; 0.840 sec/batch)
128
2018-06-02 23:15:55.398845: step 9150, loss = 0.29 (337.3 examples/sec; 0.379 sec/batch)
128
2018-06-02 23:15:59.253539: step 9160, loss = 0.37 (332.1 examples/sec; 0.385 sec/batch)
128
2018-06-02 23:16:03.543069: step 9170, loss = 0.37 (298.5 examples/sec; 0.429 sec/batch)
128
2018-06-02 23:16:09.206892: step 9180, loss = 0.28 (226.0 examples/sec; 0.566 sec/batch)
128
2018-06-02 23:16:13.211184: step 9190, loss = 0.34 (319.6 examples/sec; 0.401 sec/batch)
INFO:tensorflow:global_step/sec: 0.832063
128
2018-06-02 23:16:19.285966: step 9200, loss = 0.31 (211.1 examples/sec; 0.606 sec/batch)
128
2018-06-02 23:16:23.172544: step 9210, loss = 0.31 (328.6 examples/sec; 0.390 sec/batch)
128
2018-06-02 23:16:27.084084: step 9220, loss = 0.29 (327.2 examples/sec; 0.391 sec/batch)
128
2018-06-02 23:16:30.99562

KeyboardInterrupt: 