# Download data from GCS

In [3]:
from google.cloud import storage
import pandas as pd
from io import StringIO

storage_client = storage.Client()
bucket = storage_client.get_bucket('caip_notebooks_demo_temp')
blob = storage.Blob("train_df.csv", bucket)
train_df = pd.read_csv(StringIO(str(blob.download_as_string(),'utf-8')))
blob = storage.Blob("test_df.csv", bucket)
test_df = pd.read_csv(StringIO(str(blob.download_as_string(),'utf-8')))

## Model
### Input functions

[Estimator framework](https://www.tensorflow.org/get_started/premade_estimators#overview_of_programming_with_estimators) provides [input functions](https://www.tensorflow.org/api_docs/python/tf/estimator/inputs/pandas_input_fn) that wrap Pandas dataframes.

In [4]:
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
# Training input on the whole training set with no limit on training epochs.
train_input_fn = tf.estimator.inputs.pandas_input_fn(
    train_df, train_df["polarity"], num_epochs=None, shuffle=True)

# Prediction on the whole training set.
predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
    train_df, train_df["polarity"], shuffle=False)
# Prediction on the test set.
predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(
    test_df, test_df["polarity"], shuffle=False)

### Feature columns

TF-Hub provides a [feature column](https://github.com/tensorflow/hub/blob/master/docs/api_docs/python/hub/text_embedding_column.md) that applies a module on the given text feature and passes further the outputs of the module. In this tutorial we will be using the [nnlm-en-dim128 module](https://tfhub.dev/google/nnlm-en-dim128/1). For the purpose of this tutorial, the most important facts are:

* The module takes **a batch of sentences in a 1-D tensor of strings** as input.
* The module is responsible for **preprocessing of sentences** (e.g. removal of punctuation and splitting on spaces).
* The module works with any input (e.g. **nnlm-en-dim128** hashes words not present in vocabulary into ~20.000 buckets).

In [5]:
embedded_text_feature_column = hub.text_embedding_column(
    key="sentence", 
    module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")

### Estimator

For classification we can use a [DNN Classifier](https://www.tensorflow.org/api_docs/python/tf/estimator/DNNClassifier) (note further remarks about different modelling of the label function at the end of the tutorial).

In [6]:
estimator = tf.estimator.DNNClassifier(
    hidden_units=[500, 100],
    feature_columns=[embedded_text_feature_column],
    n_classes=2,
    optimizer=tf.train.AdagradOptimizer(learning_rate=0.003))

INFO:tensorflow:Using default config.


INFO:tensorflow:Using default config.






INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_protocol': None, '_task_id': 0, '_log_step_count_steps': 100, '_is_chief': True, '_task_type': 'worker', '_master': '', '_save_summary_steps': 100, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa0b2772320>, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_num_ps_replicas': 0, '_keep_checkpoint_every_n_hours': 10000, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_num_worker_replicas': 1, '_global_id_in_cluster': 0, '_train_distribute': None, '_evaluation_master': '', '_eval_distribute': None, '_model_dir': '/tmp/tmpnsd5chh0', '_service': None, '_save_checkpoints_steps': None, '_session_creation_timeout_secs': 7200, '_device_fn': None}


INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_protocol': None, '_task_id': 0, '_log_step_count_steps': 100, '_is_chief': True, '_task_type': 'worker', '_master': '', '_save_summary_steps': 100, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa0b2772320>, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_num_ps_replicas': 0, '_keep_checkpoint_every_n_hours': 10000, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_num_worker_replicas': 1, '_global_id_in_cluster': 0, '_train_distribute': None, '_evaluation_master': '', '_eval_distribute': None, '_model_dir': '/tmp/tmpnsd5chh0', '_service': None, '_save_checkpoints_steps': None, '_session_creation_timeout_secs': 7200, '_device_fn': None}


### Training

Train the estimator for a reasonable amount of steps.

In [7]:
# Training for 1,000 steps means 128,000 training examples with the default
# batch size. This is roughly equivalent to 5 epochs since the training dataset
# contains 25,000 examples.
estimator.train(input_fn=train_input_fn, steps=2);

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpnsd5chh0/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpnsd5chh0/model.ckpt.


INFO:tensorflow:loss = 88.44333, step = 1


INFO:tensorflow:loss = 88.44333, step = 1


INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpnsd5chh0/model.ckpt.


INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpnsd5chh0/model.ckpt.


INFO:tensorflow:Loss for final step: 86.21233.


INFO:tensorflow:Loss for final step: 86.21233.


# Export artifacts

In [8]:
def serving_input_receiver_fn():
  text_input = tf.placeholder(dtype=tf.string, shape=[None])
  # embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2")
  # embedded_text = embed(text_input)
  feed_dict={"sentence": text_input}
  return tf.estimator.export.ServingInputReceiver(feed_dict, feed_dict)

estimator.export_savedmodel('saved_model', serving_input_receiver_fn)

Instructions for updating:
This function has been renamed, use `export_saved_model` instead.


Instructions for updating:
This function has been renamed, use `export_saved_model` instead.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


INFO:tensorflow:Signatures INCLUDED in export for Classify: ['classification', 'serving_default']


INFO:tensorflow:Signatures INCLUDED in export for Classify: ['classification', 'serving_default']


INFO:tensorflow:Signatures INCLUDED in export for Train: None


INFO:tensorflow:Signatures INCLUDED in export for Train: None


INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']


INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']


INFO:tensorflow:Signatures INCLUDED in export for Eval: None


INFO:tensorflow:Signatures INCLUDED in export for Eval: None


INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']


INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']


INFO:tensorflow:Restoring parameters from /tmp/tmpnsd5chh0/model.ckpt-2


INFO:tensorflow:Restoring parameters from /tmp/tmpnsd5chh0/model.ckpt-2


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets written to: saved_model/temp-b'1576695243'/assets


INFO:tensorflow:Assets written to: saved_model/temp-b'1576695243'/assets


INFO:tensorflow:SavedModel written to: saved_model/temp-b'1576695243'/saved_model.pb


INFO:tensorflow:SavedModel written to: saved_model/temp-b'1576695243'/saved_model.pb


b'saved_model/1576695243'

In [9]:
%%bash
readonly GCS_MODEL_DIR="gs://caip_notebooks_demo_temp/models/saved_model/"

readonly MODEL_FILE_PATH=$(find "./saved_model" | grep "saved_model.pb")
readonly LOCAL_MODEL_DIR=$(dirname "${MODEL_FILE_PATH}")
# Removing old model (just in case)
gsutil -m rm -rf "${GCS_MODEL_DIR}"
# Uploading latest model
gsutil -m cp -r "${LOCAL_MODEL_DIR}/*" "${GCS_MODEL_DIR}"

Removing gs://caip_notebooks_demo_temp/models/saved_model/train.ipynb#1576695208388675...
/ [1/1 objects] 100% Done                                                       
Operation completed over 1 objects.                                              
Copying file://./saved_model/1576695243/assets/tokens.txt [Content-Type=text/plain]...
Copying file://./saved_model/1576695243/variables/variables.data-00000-of-00002 [Content-Type=application/octet-stream]...
Copying file://./saved_model/1576695243/variables/variables.index [Content-Type=application/octet-stream]...
Copying file://./saved_model/1576695243/variables/variables.data-00001-of-00002 [Content-Type=application/octet-stream]...
Copying file://./saved_model/1576695243/saved_model.pb [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_thr