## Challenge Exercise

Create a neural network that is capable of finding the volume of a cylinder given the radius of its base (r) and its height (h). Assume that the radius and height of the cylinder are both in the range 0.5 to 2.0. Unlike in the challenge exercise for b_estimator.ipynb, assume that your measurements of r, h and V are all rounded off to the nearest 0.1. Simulate the necessary training dataset. This time, you will need a lot more data to get a good predictor.
<p>
Now modify the "noise" so that instead of just rounding off the value, there is up to a 10% error (uniformly distributed) in the measurement followed by rounding off.

In [None]:
from google.datalab.ml import TensorBoard
from math import pi
import numpy as np
import pandas as pd
import shutil
import tensorflow as tf

print(tf.__version__)

## Generate data

In [None]:
def generate_cylinders(filename, size):
  """Generate a dataframe of cylinders where the radius and height of
  each cylinder are both in the range 0.5 to 2.0 and the volume equals
  (pi * r^2) * h. Then add up to a 10% error (uniformly distributed) to
  the volume, followed by rounding off radius, height and volume to the
  nearest 0.1. Then write that dataframe to a CSV file.
  """
  
  # Generate radiuses and heights
  radius = np.random.uniform(0.5, 2.0, size=size)
  height = np.random.uniform(0.5, 2.0, size=size)
  
  # Calculate the correct volumes with those radiuses and heights
  volume = (pi * radius ** 2) * height
  
  # Add the error to the volumes
  volume = volume * np.random.uniform(0.9, 1.1, size=size)
  
  # Then round off radius, height and volume
  radius = np.round(radius, decimals=1)
  height = np.round(height, decimals=1)
  volume = np.round(volume, decimals=1)
  
  df = pd.DataFrame({
    'volume': volume,
    'radius': radius,
    'height': height,
  })
  
  df.to_csv(filename, header=False, index=False)
  print('wrote {} cylinders to {}'.format(size, filename))

total_size = 10 ** 7

generate_cylinders('cylinders-train.csv', int(0.8 * total_size))
generate_cylinders('cylinders-valid.csv', int(0.1 * total_size))
generate_cylinders('cylinders-test.csv', int(0.1 * total_size))

## Input

In [None]:
CSV_COLUMNS = ['volume', 'radius','height']
LABEL_COLUMN = 'volume'
DEFAULTS = [[25.1], [2.0], [2.0]]

def read_dataset(filename, mode, batch_size = 1024):
      def decode_csv(value_column):
          """Read a single line of CSV and return a tuple (features, label), where
          * features is a dict of radius and height,
          * and label is a dict of volume.
          """
          columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
          features = dict(zip(CSV_COLUMNS, columns))
          label = features.pop(LABEL_COLUMN)

          return features, label

      # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
      filenames_dataset = tf.data.Dataset.list_files(filename)
      
      # Read lines from text files
      textlines_dataset = filenames_dataset.flat_map(tf.data.TextLineDataset)
      
      # Parse text lines as comma-separated values (CSV)
      dataset = textlines_dataset.map(decode_csv)

      # Note:
      # use tf.data.Dataset.flat_map to apply one to many transformations (here: filename -> text lines)
      # use tf.data.Dataset.map      to apply one to one  transformations (here: text line -> feature list)

      if mode == tf.estimator.ModeKeys.TRAIN:
          num_epochs = None # indefinitely
          dataset = dataset.shuffle(buffer_size = 10 * batch_size)
      else:
          num_epochs = 1 # end-of-input after this

      dataset = dataset.repeat(num_epochs).batch(batch_size)
      
      return dataset

## Create features out of input data
For now, pass these through.  (same as previous lab)

In [None]:
INPUT_COLUMNS = [
    tf.feature_column.numeric_column('radius'),
    tf.feature_column.numeric_column('height'),
]

def add_more_features(feats):
    # Nothing to add (yet!)
    return feats

feature_cols = add_more_features(INPUT_COLUMNS)

## Serving input function

In [None]:
def serving_input_fn():
    """Defines the expected shape of the JSON feed that the model
    will receive once deployed behind a REST API in production.
    """

    json_feature_placeholders = {
        'radius' : tf.placeholder(tf.float32, [None]),
        'height' : tf.placeholder(tf.float32, [None]),
    }
    
    # You can transform data here from the input format to the format expected by your model.
    features = json_feature_placeholders # no transformation needed
    
    return tf.estimator.export.ServingInputReceiver(features, json_feature_placeholders)

## tf.estimator.train_and_evaluate

In [None]:
def train_and_evaluate(output_dir, num_train_steps):
    estimator = tf.estimator.DNNRegressor(
                        hidden_units=[128, 64, 32, 16],
                        model_dir = output_dir,
                        feature_columns = feature_cols)
    
    train_spec=tf.estimator.TrainSpec(
                       input_fn = lambda: read_dataset('./cylinders-train.csv', mode = tf.estimator.ModeKeys.TRAIN),
                       max_steps = num_train_steps)

    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)

    eval_spec=tf.estimator.EvalSpec(
                       input_fn = lambda: read_dataset('./cylinders-valid.csv', mode = tf.estimator.ModeKeys.EVAL),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10,  # evaluate every N seconds
                       exporters = exporter)
    
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

## Monitoring with TensorBoard
Use "refresh" in Tensorboard during training to see progress.

In [None]:
OUTDIR = './cylinders_trained'
TensorBoard().start(OUTDIR)

## Run training

In [None]:
# Run training    
shutil.rmtree(OUTDIR, ignore_errors = True)  # start fresh each time
tf.summary.FileWriterCache.clear()  # ensure filewriter cache is clear for TensorBoard events file
train_and_evaluate(OUTDIR, num_train_steps = 1024)

### You can now shut Tensorboard down

In [None]:
pids_df = TensorBoard.list()
if not pids_df.empty:
    for pid in pids_df['pid']:
        TensorBoard().stop(pid)
        print('Stopped TensorBoard with pid {}'.format(pid))

Copyright 2017 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License