Create TensorFlow DNN model

In [1]:
# declare bucket, project, region variables
BUCKET = 'crudy-end-to-end-ml-tf-gcp'
PROJECT = 'crudy-end-to-end-ml-tf-gcp'
REGION = 'us-west1'

In [3]:
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION

In [4]:
%%bash
if ! gsutil ls | grep -q gs://${BUCKET}/; then
  gsutil mb -l ${REGION} gs://${BUCKET}
fi

Create TensorFlow model using TensorFlow's Estimator API

First, write an input_fn to read the data

In [5]:
import shutil
import numpy as np
import tensorflow as tf
print(tf.__version__)

  from ._conv import register_converters as _register_converters


1.8.0


In [6]:
# Determine CSV, label, and key columns
CSV_COLUMNS = 'weight_pounds,is_male,mother_age,plurality,gestation_weeks,key'
LABEL_COLUMN = 'weight_pounds'
KEY_COLUMN = 'key'

# Set default values for each CSV column
DEFAULTS = [[0.0], ['null'], [0.0], ['null'], [0.0], ['nokey']]
TRAIN_STEPS = 1000

In [7]:
# Create an input function reading a file using the Dataset API
# then provide the results to the Estimator API
def read_dataset(filename, mode, batch_size=512):
  def _input_fn():
    def decode_csv(value_column):
      columns = tf.decode_csv(value_column, record_defaults=DEFAULT)
      features = dict(zip(CSV_COLUMNS, columns))
      label = features.pop(LABEL_COLUMN)
      return features, label
    
    # Create list of files that match pattern
    file_list =tf.gfile.Glob(filename)
    
    # Create dataset from file list
    dataset = (tf.data.TextLineDataset(file_list) # Read text file
               .map(decode_csv)) # Transform each element by applying decode_csv function
    
    if mode == tf.estimator.ModeKeys.TRAIN:
      num_epochs = None #indefinitely
      dataset = dataset.shuffle(buffer_size=10*batch_size)
    else:
      num_epochs = 1 # end-of-input after this
      
    dataset = dataset.repeat(num_epochs).batch(batch_size)
    return dataset
  return _input_fn
    
    

Next, define the feature columns

In [8]:
# Define feature columns
def get_categorical(name, values):
  return tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list(name, values))

def get_cols():
  # Define column types
  return [\
          get_categorical('is_male', ['True', 'False', 'Unknown']),
          tf.feature_column.numeric_column('mother_age'),
          get_categorical('plurality', ['Single(1)', 'Twins(2)', 'Triplets(3)', 'Quadruplets(4)', 'Quintuplets(5)', 'Multipe(2+)']),
          tf.feature_column.numeric_column('gestation_weeks')
         ]

To predict with the TensorFlow model, we also need a serving input function.  We will want all the inputs from our user.

In [None]:
# Create serving input function to be able to serve predictions later using provided inputs


In [None]:
# Create estimator to train and evaluate
def train_and_evaluate(output_dir):
  EVAL_INTERVAL = 300
  run_config = tf.estimator.RunConfig(save_checkpoints_secs = EVAL_INTERVAL, keep_checkpoint_max = 3)
  estimator = tf.estimator.DNNRegressor(model_dir = output_dir, feature_columns = get_cols, hidden_units = [64, 32], config = run_config)
  train_spec = tf.estimator.TrainSpec(input_fn = read_dataset('train.csv', mode = tf.estimator.ModeKeys.TRAIN), max_steps = TRAIN_STEPS)
  exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  
  
                                        