Merge pull request tensorflow#47 from tensorflow/internal-to-github-sync

Merge internal changes into public repository (change 181251654)
freedomtan · Jan 9, 2018 · d10a0f2 · d10a0f2
2 parents 1dfee59 + a2a75e0
commit d10a0f2
Show file tree

Hide file tree

Showing 18 changed files with 3,422 additions and 155 deletions.
diff --git a/cloud_tpu/datasets/fake_data_generator.py b/cloud_tpu/datasets/fake_data_generator.py
@@ -0,0 +1,139 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Script to create a fake dataset to test out ResNet-50 and related models.
+
+To run the script setup a virtualenv with the following libraries installed.
+- `gcloud`: Follow the instructions on
+  [cloud SDK docs](https://cloud.google.com/sdk/downloads) followed by
+  installing the python api using `pip install google-cloud`.
+- `tensorflow`: Install with `pip install tensorflow`
+- `Pillow`: Install with `pip install pillow`
+
+You can run the script using the following command.
+```
+python fake_data_generator.py \
+  --project="TEST_PROJECT" \
+  --gcs_output_path="gs://TEST_BUCKET/DATA_DIR"
+```
+"""
+
+import os
+import StringIO
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+
+tf.flags.DEFINE_string('project', None,
+                       'Google cloud project id for uploading the dataset.')
+tf.flags.DEFINE_string('gcs_output_path', None,
+                       'GCS path for uploading the dataset.')
+tf.flags.DEFINE_integer('examples_per_shard', 5000, '')
+tf.flags.DEFINE_integer('num_label_classes', 1000, '')
+tf.flags.DEFINE_integer('training_shards', 260, '')
+tf.flags.DEFINE_integer('validation_shards', 10, '')
+
+FLAGS = tf.flags.FLAGS
+
+TRAINING_PREFIX = 'train'
+VALIDATION_PREFIX = 'validation'
+
+
+def _int64_feature(value):
+  """Wrapper for inserting int64 features into Example proto."""
+  if not isinstance(value, list):
+    value = [value]
+  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def _bytes_feature(value):
+  """Wrapper for inserting bytes features into Example proto."""
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _create_example(label):
+  """Build an Example proto for a single randomly generated image."""
+  colorspace = 'RGB'
+  channels = 3
+  image_format = 'JPEG'
+  height = 224
+  width = 224
+
+  # Create a random image
+  image = (np.random.rand(height, width, channels) * 255).astype('uint8')
+  image = Image.fromarray(image)
+  image_buffer = StringIO.StringIO()
+  image.save(image_buffer, format=image_format)
+
+  example = tf.train.Example(
+      features=tf.train.Features(
+          feature={
+              'image/height': _int64_feature(height),
+              'image/width': _int64_feature(width),
+              'image/colorspace': _bytes_feature(colorspace),
+              'image/channels': _int64_feature(channels),
+              'image/class/label': _int64_feature(label),
+              'image/format': _bytes_feature(image_format),
+              'image/encoded': _bytes_feature(image_buffer.getvalue())
+          }))
+  return example
+
+
+def _create_random_file(output_file):
+  """Create a single tf-record file with multiple examples for each class."""
+  writer = tf.python_io.TFRecordWriter(output_file)
+  examples_per_class = int(FLAGS.examples_per_shard / FLAGS.num_label_classes)
+
+  assert examples_per_class > 0, 'Number of examples per class should be >= 1'
+
+  for label in range(FLAGS.num_label_classes):
+    for _ in range(examples_per_class):
+      example = _create_example(label)
+      writer.write(example.SerializeToString())
+  writer.close()
+
+
+def create_tf_records(data_dir):
+  """Create random data and write it to tf-record files."""
+  def _create_records(prefix, num_shards):
+    """Create records in a given directory."""
+    for shard in range(num_shards):
+      filename = os.path.join(data_dir, '%s-%.5d-of-%.5d' % (prefix, shard,
+                                                             num_shards))
+      _create_random_file(filename)
+
+  tf.logging.info('Processing the training data.')
+  _create_records(TRAINING_PREFIX, FLAGS.training_shards)
+
+  tf.logging.info('Processing the validation data.')
+  _create_records(VALIDATION_PREFIX, FLAGS.validation_shards)
+
+
+def main(argv):  # pylint: disable=unused-argument
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  if FLAGS.project is None:
+    raise ValueError('GCS Project must be provided.')
+
+  if FLAGS.gcs_output_path is None:
+    raise ValueError('GCS output path must be provided.')
+  elif not FLAGS.gcs_output_path.startswith('gs://'):
+    raise ValueError('GCS output path must start with gs://')
+
+  # Create fake tf-records
+  create_tf_records(FLAGS.gcs_output_path)
+
+
+if __name__ == '__main__':
+  tf.app.run()
diff --git a/cloud_tpu/datasets/imagenet_to_gcs.py b/cloud_tpu/datasets/imagenet_to_gcs.py
@@ -410,22 +410,21 @@ def upload_to_gcs(training_records, validation_records):
   client = storage.Client(project=FLAGS.project)
   bucket = client.get_bucket(bucket_name)
 
-  def _upload_files(filenames, subdirectory):
+  def _upload_files(filenames):
     """Upload a list of files into a specifc subdirectory."""
     for i, filename in enumerate(sorted(filenames)):
-      blob = bucket.blob(key_prefix + subdirectory + '/' +
-                         os.path.basename(filename))
+      blob = bucket.blob(key_prefix + os.path.basename(filename))
       blob.upload_from_filename(filename)
       if not i % 20:
         tf.logging.info('Finished uploading file: %s' % filename)
 
   # Upload training dataset
   tf.logging.info('Uploading the training data.')
-  _upload_files(training_records, TRAINING_DIRECTORY)
+  _upload_files(training_records)
 
   # Upload validation dataset
   tf.logging.info('Uploading the validation data.')
-  _upload_files(validation_records, VALIDATION_DIRECTORY)
+  _upload_files(validation_records)
 
 
 def main(argv):  # pylint: disable=unused-argument

diff --git a/cloud_tpu/jupyterhub/launch.sh b/cloud_tpu/jupyterhub/launch.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# This script tries to launch the Jupyterhub notebook in this directory if it
+# is running in a GCE instance.
+#
+# 1. Verifies that Jupyterhub, TensorFlow, and gcloud are installed properly
+# 2. Tags the current instance with `cloud-tpu-demo-notebook`.
+# 3. Creates a firewall rule that opens port 8888 (for Jupyterhub) and port
+#    6006 (for TensorBoard) for all instances tagged `cloud-tpu-demo-notebook`.
+# 4. Starts Jupyterhub.
+
+version_lte() {
+    [  "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ]
+}
+
+# Ensure that pip is installed
+command -v pip >/dev/null 2>&1 || { echo "To run this tutorial, we need pip to be installed. You can install pip by running `sudo apt-get install python-pip`."; exit 1; }
+
+# Ensure that gcloud is installed
+command -v gcloud >/dev/null 2>&1 || { echo "To run this tutorial, we need the Google Cloud SDK. Please see https://cloud.google.com/sdk/downloads for instructions."; exit 1; }
+
+# Ensure that TensorFlow is installed
+TF_VERSION=`python -c "import tensorflow; print tensorflow.__version__" 2>/dev/null`
+version_lte $TF_VERSION 1.5 && (echo "Your version of TensorFlow is too low. You must install at least version 1.5.0."; exit 1;)
+
+# Ensure that Jupyter is installed
+command -v jupyter >/dev/null 2>&1 || { sudo pip install jupyter; }
+
+# Retrieve the instance name and zone of the current instance
+INSTANCE_NAME=`curl -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/name 2>/dev/null`
+INSTANCE_ZONE=`curl -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/zone 2>/dev/null`
+
+# Add `cloud-tpu-demo-notebook` tag to current instance
+gcloud compute instances add-tags $INSTANCE_NAME --tags cloud-tpu-demo-notebook --zone $INSTANCE_ZONE
+
+# Add firewall rule to open tcp:6006,8888 for `cloud-tpu-demo-notebook`
+gcloud compute firewall-rules create cloud-tpu-demo-notebook --target-tags=cloud-tpu-demo-notebook --allow=tcp:6006,tcp:8888
+
+# Print out JupyterHub URL
+echo
+echo The Jupyterhub is at: http://`curl -H "Metadata-Flavor: Google" http://metadata/computeMetadata/v1/instance/network-interfaces/0/access-configs/0/external-ip 2> /dev/null`:8888/
+echo
+
+# Launch JupyterHub
+jupyter notebook --no-browser --ip=0.0.0.0
diff --git a/cloud_tpu/models/cifar_keras/cifar_keras.py b/cloud_tpu/models/cifar_keras/cifar_keras.py
@@ -0,0 +1,146 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Cifar example using Keras for model definition."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow.contrib.tpu.python.tpu import tpu_config
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator
+from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
+
+tf.flags.DEFINE_integer("batch_size", 128,
+                        "Mini-batch size for the computation. Note that this "
+                        "is the global batch size and not the per-shard batch.")
+tf.flags.DEFINE_float("learning_rate", 0.05, "Learning rate.")
+tf.flags.DEFINE_string("train_file", "", "Path to cifar10 training data.")
+tf.flags.DEFINE_integer("train_steps", 100000,
+                        "Total number of steps. Note that the actual number of "
+                        "steps is the next multiple of --iterations greater "
+                        "than this value.")
+tf.flags.DEFINE_bool("use_tpu", True, "Use TPUs rather than plain CPUs")
+tf.flags.DEFINE_string("master", "",
+                       "BNS name of the TensorFlow master to use.")
+tf.flags.DEFINE_string("model_dir", None, "Estimator model_dir")
+tf.flags.DEFINE_integer("iterations", 100,
+                        "Number of iterations per TPU training loop.")
+tf.flags.DEFINE_integer("num_shards", 8, "Number of shards (TPU chips).")
+
+
+FLAGS = tf.flags.FLAGS
+
+
+def model_fn(features, labels, mode, params):
+  """Define a CIFAR model in Keras."""
+  del params  # unused
+  layers = tf.contrib.keras.layers
+
+  # Pass our input tensor to initialize the Keras input layer.
+  v = layers.Input(tensor=features)
+  v = layers.Conv2D(filters=32, kernel_size=5,
+                    activation="relu", padding="same")(v)
+  v = layers.MaxPool2D(pool_size=2)(v)
+  v = layers.Conv2D(filters=64, kernel_size=5,
+                    activation="relu", padding="same")(v)
+  v = layers.MaxPool2D(pool_size=2)(v)
+  v = layers.Flatten()(v)
+  fc1 = layers.Dense(units=512, activation="relu")(v)
+  logits = layers.Dense(units=10)(fc1)
+
+  # Instead of constructing a Keras model for training, build our loss function
+  # and optimizer in Tensorflow.
+  #
+  # N.B.  This construction omits some features that are important for more
+  # complex models (e.g. regularization, batch-norm).  Once
+  # `model_to_estimator` support is added for TPUs, it should be used instead.
+  loss = tf.reduce_mean(
+      tf.nn.sparse_softmax_cross_entropy_with_logits(
+          logits=logits, labels=labels
+      )
+  )
+  optimizer = tf.train.AdamOptimizer()
+  if FLAGS.use_tpu:
+    optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)
+
+  train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
+
+  return tpu_estimator.TPUEstimatorSpec(
+      mode=mode,
+      loss=loss,
+      train_op=train_op,
+      predictions={
+          "classes": tf.argmax(input=logits, axis=1),
+          "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
+      }
+  )
+
+
+def input_fn(params):
+  """Read CIFAR input data from a TFRecord dataset."""
+  del params
+  batch_size = FLAGS.batch_size
+  def parser(serialized_example):
+    """Parses a single tf.Example into image and label tensors."""
+    features = tf.parse_single_example(
+        serialized_example,
+        features={
+            "image": tf.FixedLenFeature([], tf.string),
+            "label": tf.FixedLenFeature([], tf.int64),
+        })
+    image = tf.decode_raw(features["image"], tf.uint8)
+    image.set_shape([3*32*32])
+    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
+    image = tf.transpose(tf.reshape(image, [3, 32, 32]))
+    label = tf.cast(features["label"], tf.int32)
+    return image, label
+
+  dataset = tf.data.TFRecordDataset([FLAGS.train_file])
+  dataset = dataset.map(parser, num_parallel_calls=batch_size)
+  dataset = dataset.prefetch(4 * batch_size).cache().repeat()
+  dataset = dataset.apply(
+      tf.contrib.data.batch_and_drop_remainder(FLAGS.batch_size)
+  )
+  dataset = dataset.prefetch(1)
+  images, labels = dataset.make_one_shot_iterator().get_next()
+  return images, labels
+
+
+def main(argv):
+  del argv  # Unused.
+
+  run_config = tpu_config.RunConfig(
+      master=FLAGS.master,
+      model_dir=FLAGS.model_dir,
+      save_checkpoints_secs=3600,
+      session_config=tf.ConfigProto(
+          allow_soft_placement=True, log_device_placement=True),
+      tpu_config=tpu_config.TPUConfig(
+          iterations_per_loop=FLAGS.iterations, num_shards=FLAGS.num_shards),
+  )
+
+  estimator = tpu_estimator.TPUEstimator(
+      model_fn=model_fn,
+      use_tpu=FLAGS.use_tpu,
+      config=run_config,
+      train_batch_size=FLAGS.batch_size)
+  estimator.train(input_fn=input_fn, max_steps=FLAGS.train_steps)
+
+
+if __name__ == "__main__":
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run(main)