In [0]:
%%bash
pip install tensorflow-transform
pip install apache-beam[gcp]

In [0]:
%%writefile model.py
from __future__ import print_function, division, absolute_import # python 2 compatibility
import tensorflow as tf
from tensorflow_transform.saved import input_fn_maker, saved_transform_io
from tensorflow_transform.tf_metadata import metadata_io
import tensorflow_hub as hub
import apache_beam as beam
import shutil
import os
print(tf.__version__)
tf.logging.set_verbosity(tf.logging.INFO)

In [0]:
REGION = 'asia-east1'
BUCKET = '{BUCKET}'
PROJECT = '{PROJECT}'

# Cloud Setup
This section is required only if running on cloud (ML Engine)

In [0]:
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION
os.environ['TFVERSION'] = '1.9'

In [0]:
%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

# Import Data
Data is assumed to be in the `TFRecords` format with GZIP compression. This gets us the best performance and scalability compared to csv files. The conversion of `csv` to `TFRecords` should be done in the previous notebook, `02-tf_transform.ipynb`

# Set up Model as a Package
We need to set up our model as a package for training and serving.

- `model.py` provides the code for data inputs and the model itself
- `setup.py` provides metadata about the package
- `task.py` sets up the package to be used from the command line, with arguments that specify hyperparameters to the model as well as GCP resources 

In [0]:
%%writefile model.py --append


CSV_COLUMNS = ['spam', 'text']
LABEL_COLUMN = 'spam'
DEFAULTS = [['spam'], ["FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, 1.50 to rcv"]]
INPUT_COLUMNS = [
    tf.placeholder(tf.string, name='text')
]


def build_estimator(model_dir, model_type, embedding_type, learning_rate,
                    hidden_units, dropout,
                    l1_regularization_strength, l2_regularization_strength):
    (text) = INPUT_COLUMNS
  
    if embedding_type == 'nnlm':
        module_url = 'https://tfhub.dev/google/nnlm-en-dim128/1'
        embedding_size = 128
    elif embedding_type == 'universal-sentence-encoder':
        module_url = 'https://tfhub.dev/google/universal-sentence-encoder/2'
        embedding_size = 512
    elif embedding_type == 'elmo':
        module_url = 'https://tfhub.dev/google/elmo/2'
        embedding_size = 1024
    elif embedding_type is None:
        pass
    else:
        raise InputError('Embedding type must be one of "nnlm", "universal-sentence-encoder", "elmo", None')
    
    if embedding_type is not None:
        embed = hub.Module(module_url, trainable=False)
        embedding = embed(text)
    
    if model_type == 'linear':
        return tf.estimator.LinearClassifier(
            feature_columns=[embedding],
            n_classes=2,
            model_dir=model_dir,
            optimizer=tf.train.FtrlOptimizer(
                learning_rate=learning_rate,
                l1_regularization_strength=l1_regularization_strength,
                l2_regularization_strength=l2_regularization_strength
            )
        )
    elif model_type == 'dnn':
        return tf.estimator.DNNClassifier(
            feature_columns=[embedding],
            hidden_units=hidden_units,
            n_classes=2,
            model_dir=model_dir,
            optimizer=tf.train.AdamOptimizer(
                learning_rate=learning_rate,
            ),
            dropout=dropout
        )
    else:
        raise InputErorr('Model type must be one of "linear" or "dnn"')