In [0]:
!pip install -q "tensorflow>=1.7.0" tensorflow-hub


In [0]:
import pandas as pd
import tensorflow as tf

In [5]:
tf.__version__

'1.6.0'

# Checkpoints

  Tensorflow provide two model formats:
  * **checkpoints**, which is a format dependent on the code that created the model.
  * **SaveModel**, which is a format independent of the code that created the model.





## prepare the Iris data

In [0]:
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

In [0]:
def maybe_download():
  train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
  test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
  
  return train_path, test_path

In [0]:
def load_data(y_name='Species'):
  train_path, test_path = maybe_download()
  
  train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
  train_x, train_y = train, train.pop(y_name)
  
  test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
  test_x, test_y = test, test.pop(y_name)
  
  return (train_x, train_y), (test_x, test_y)

In [0]:
def train_input_fn(features, labels, batch_size):
  dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
  
  dataset = dataset.shuffle(1000).repeat().batch(batch_size)
  
  return dataset

In [0]:
def eval_input_fn(features, labels, batch_size):
  features = dict(features)
  
  if labels is None:
    inputs = features
  else:
    inputs = (features, labels)
    
  dataset = tf.data.Dataset.from_tensor_slices(inputs)
  
  # Batch the examples
  assert batch_size is not None, "batch_size must not be None"
  dataset = dataset.batch(batch_size)
  
  return dataset

## Estimator

In [11]:
# fetch the data
(train_x, train_y), (test_x, test_y) = load_data()

Downloading data from http://download.tensorflow.org/data/iris_training.csv

Downloading data from http://download.tensorflow.org/data/iris_test.csv



In [12]:
train_x.shape

(120, 4)

In [13]:
# Feature columns describe how to use the input
my_feature_columns = []
for key in train_x.keys():
  print(key)
  my_feature_columns.append(tf.feature_column.numeric_column(key=key))

SepalLength
SepalWidth
PetalLength
PetalWidth


In [14]:
my_feature_columns

[_NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [20]:
# Build 2 hidden layer DNN with 10, 10 units respectively
classifier = tf.estimator.DNNClassifier(
  feature_columns=my_feature_columns,
  hidden_units=[10, 10],
  n_classes=3,
  model_dir="models/iris")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models/iris', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8829e0aa58>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [0]:
batch_size=32
train_steps=100

In [22]:
# Trin the Model
classifier.train(
  input_fn=lambda:train_input_fn(train_x, train_y,
                                 batch_size),
  steps=train_steps)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into models/iris/model.ckpt.
INFO:tensorflow:loss = 83.300064, step = 1
INFO:tensorflow:Saving checkpoints for 100 into models/iris/model.ckpt.
INFO:tensorflow:Loss for final step: 4.1511807.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f8829e0a978>

![chart](https://www.tensorflow.org/images/first_train_calls.png)

In [23]:
!ls -l models/iris

total 952
-rw-r--r-- 1 root root    128 Apr 16 14:06 checkpoint
-rw-r--r-- 1 root root 415745 Apr 16 14:06 events.out.tfevents.1523887587.0aacc8754a54
-rw-r--r-- 1 root root 281096 Apr 16 14:06 graph.pbtxt
-rw-r--r-- 1 root root   1552 Apr 16 14:06 model.ckpt-100.data-00000-of-00001
-rw-r--r-- 1 root root    535 Apr 16 14:06 model.ckpt-100.index
-rw-r--r-- 1 root root 125323 Apr 16 14:06 model.ckpt-100.meta
-rw-r--r-- 1 root root   1552 Apr 16 14:06 model.ckpt-1.data-00000-of-00001
-rw-r--r-- 1 root root    535 Apr 16 14:06 model.ckpt-1.index
-rw-r--r-- 1 root root 125323 Apr 16 14:06 model.ckpt-1.meta


## Default checkpoint directory

In [24]:
# Build 2 hidden layer DNN with 10, 10 units respectively
classifier = tf.estimator.DNNClassifier(
  feature_columns=my_feature_columns,
  hidden_units=[10, 10],
  n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpynla46ux', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8829f034e0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [25]:
# Trin the Model
classifier.train(
  input_fn=lambda:train_input_fn(train_x, train_y,
                                 batch_size),
  steps=train_steps)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpynla46ux/model.ckpt.
INFO:tensorflow:loss = 56.72892, step = 1
INFO:tensorflow:Saving checkpoints for 100 into /tmp/tmpynla46ux/model.ckpt.
INFO:tensorflow:Loss for final step: 6.737196.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f8829f03e48>

In [26]:
print(classifier.model_dir)

/tmp/tmpynla46ux


In [27]:
!ls -l {classifier.model_dir}

total 952
-rw-r--r-- 1 root root    128 Apr 16 14:07 checkpoint
-rw-r--r-- 1 root root 415419 Apr 16 14:07 events.out.tfevents.1523887678.0aacc8754a54
-rw-r--r-- 1 root root 281096 Apr 16 14:07 graph.pbtxt
-rw-r--r-- 1 root root   1552 Apr 16 14:07 model.ckpt-100.data-00000-of-00001
-rw-r--r-- 1 root root    535 Apr 16 14:07 model.ckpt-100.index
-rw-r--r-- 1 root root 125323 Apr 16 14:07 model.ckpt-100.meta
-rw-r--r-- 1 root root   1552 Apr 16 14:07 model.ckpt-1.data-00000-of-00001
-rw-r--r-- 1 root root    535 Apr 16 14:07 model.ckpt-1.index
-rw-r--r-- 1 root root 125323 Apr 16 14:07 model.ckpt-1.meta


## Checkpointing Frequency

By default, the Estimator save checkpoints in the `model_dir` according to the following schedule:
  * Writes a checkpoints every 10 mintues
  * Writes a checkpoint when the `train` model starts(first iteration) and completes(final iteration).
  * Retains only the 5 most recent checkpoints in the directory.
  
You can us `RunConfig` object to define the desired schedule, and post to Estimator's `config` argument

In [29]:
my_checkpointing_config = tf.estimator.RunConfig(
  save_checkpoints_secs = 20 * 60, # per 20 minutes
  keep_checkpoint_max = 10, # Retain the 10 most recent checkpoints
)

classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[10, 10],
    n_classes=3,
    model_dir='models/iris',
    config=my_checkpointing_config)
   

INFO:tensorflow:Using config: {'_model_dir': 'models/iris', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 1200, '_session_config': None, '_keep_checkpoint_max': 10, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f88270af5c0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## Restoring your model

Each subsequent call to the Estimator's `train`, `evaluate`, or `predict` method causes the following:
The Estimator build the model's graph by running the `model_fn()`, and initializes the weights of the new model from the data stored the most recent checkpoint

![chart2](https://www.tensorflow.org/images/subsequent_calls.png)

## Avoiding a bad restoration

In [34]:
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[10, 10],
    n_classes=3,
    model_dir='models/iris')

classifier.train(
    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
        steps=200)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models/iris', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8829e29470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models/iris/model.ckpt-700
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 701 into models/iris/model.

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f8829e296a0>

In [38]:
classifier2 = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[20, 20],  # Change the number of neurons in the model.
    n_classes=3,
    model_dir='models/iris')

classifier2.train(
    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
        steps=200)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models/iris', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f8827d8c978>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models/iris/model.ckpt-1100


InvalidArgumentError: ignored

In [39]:
# Evaluate the model
eval_result = classifier.evaluate(
  input_fn=lambda:eval_input_fn(test_x, test_y,
                                batch_size))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-04-16-14:22:56
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models/iris/model.ckpt-1100
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-04-16-14:22:56
INFO:tensorflow:Saving dict for global step 1100: accuracy = 1.0, average_loss = 0.0509544, global_step = 1100, loss = 1.528632


In [0]:
eval_result

{'accuracy': 0.96666664,
 'average_loss': 0.09250528,
 'global_step': 300,
 'loss': 2.7751584}

In [0]:
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))



Test set accuracy: 0.967



In [0]:
# Generate predictions from the model
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}

In [0]:
predictions = classifier.predict(
  input_fn=lambda:eval_input_fn(predict_x,
                                labels=None,
                                batch_size=batch_size))

In [0]:
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
for pred_dict, expec in zip(predictions, expected):
  class_id = pred_dict['class_ids'][0]
  probability = pred_dict['probabilities'][class_id]
  
  print(template.format(SPECIES[class_id],
                        100 * probability, expec))
  

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpjwxbnbgp/model.ckpt-300
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.

Prediction is "Setosa" (99.6%), expected "Setosa"

Prediction is "Versicolor" (97.2%), expected "Versicolor"

Prediction is "Virginica" (92.3%), expected "Virginica"
