# 目录
## 1. 导入模块
## 2. 获取minist数据集
## 3. 定义输入数据格式的feature_columns
## 4. tf.estimator.inputs.numpy_input_fn可迭代的数据函数
> 存在一个 tf.estimator.inputs.numpy_input_fn
## 5. 自定义estimator
### 5.1 定义网络函数
### 5.2 实例化estimator
## 6. 训练
## 7. 测试

## 1. 导入模块

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn

from tensorflow import keras
import tensorflow as tf
import sys
import os
import time
import datetime

for module in [np, pd, mpl, sklearn, keras, tf]:
    print(module.__name__, module.__version__)

numpy 1.17.2
pandas 0.25.1
matplotlib 3.1.1
sklearn 0.21.3
tensorflow.python.keras.api._v1.keras 2.2.4-tf
tensorflow 1.15.0


## 2. 获取minist数据集

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

tf.logging.set_verbosity(tf.logging.INFO)

mnist = input_data.read_data_sets("./data/MNIST_data", one_hot=False)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./data/MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./data/MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


## 3. 定义输入数据格式的feature_columns

In [3]:
feature_columns = [tf.feature_column.numeric_column("images", shape=[784])]

## 4. tf.estimator.inputs.numpy_input_fn可迭代的数据函数

In [4]:
train_data_fn = tf.estimator.inputs.numpy_input_fn(
    x={"images": mnist.train.images},
    y=mnist.train.labels.astype(np.int32),
    shuffle=True,
    num_epochs=None,
    batch_size=32
)

test_data_fn = tf.estimator.inputs.numpy_input_fn(
    x={"images": mnist.test.images},
    y=mnist.test.labels.astype(np.int32), # 网络结构中，labels 只能接受 int32 和int64
    shuffle=False,
    num_epochs=1,
    batch_size=32
)

## 5. 自定义estimator

### 5.1 定义网络函数

In [14]:
def lenet(images, is_training):
    images = tf.reshape(images, shape=[-1, 28, 28, 1])
    
    conv1 = tf.layers.conv2d(images, 32, 5, activation=tf.nn.relu)
    conv1 = tf.layers.max_pooling2d(conv1, 2, 2)
    
    conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
    conv2 = tf.layers.max_pooling2d(conv2, 2, 2)
    
    fc1 = tf.layers.flatten(conv2)
    fc1 = tf.layers.dense(fc1, 1024)
    fc1 = tf.layers.dropout(fc1, rate=0.5, training=is_training)
    fc1 = tf.layers.dense(fc1, 10)
    
    return fc1

def model_fn(features, labels, mode, params):
    '''
    features 是一个字典， "images"--> [7854]
    '''
    ## 网络推理
    logits = lenet(features["images"], mode == tf.estimator.ModeKeys.TRAIN)
    predictions = tf.argmax(logits, axis=1)
    
    ## predict
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={
                "logits": logits,
                "probabilities": tf.nn.softmax(logits),
                "class_ids": predictions[:, tf.newaxis]
            }
        )
    
    ## eval
    loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
    accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions, name="acc_op")
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            eval_metric_ops={"accuracy": accuracy}
        )
    
    ## train
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss=loss, global_step=tf.train.get_global_step())
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train_op
        )
    

### 5.2 实例化estimator

In [15]:
output_dir = "tf1_customized_estimator_model"
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

estimator = tf.estimator.Estimator(
    model_fn=model_fn,
    model_dir=output_dir,
    params={
        "feature_columns": feature_columns,
    }
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'tf1_customized_estimator_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdc333440b8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## 6. 训练

In [16]:
trsin_result = estimator.train(
    input_fn=train_data_fn,
    steps=10000
)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into tf1_customized_estimator_model/model.ckpt.
INFO:tensorflow:loss = 2.3386955, step = 0
INFO:tensorflow:global_step/sec: 369.075
INFO:tensorflow:loss = 0.2152664, step = 100 (0.271 sec)
INFO:tensorflow:global_step/sec: 492.809
INFO:tensorflow:loss = 0.03358087, step = 200 (0.203 sec)
INFO:tensorflow:global_step/sec: 490.62
INFO:tensorflow:loss = 0.12010437, step = 300 (0.204 sec)
INFO:tensorflow:global_step/sec: 493.747
INFO:tensorflow:loss = 0.12110722, step = 400 (0.203 sec)
INFO:tensorflow:global_step/sec: 491.944
INFO:tensorflow:los

INFO:tensorflow:global_step/sec: 483.082
INFO:tensorflow:loss = 0.022228446, step = 7200 (0.207 sec)
INFO:tensorflow:global_step/sec: 486.302
INFO:tensorflow:loss = 0.00013212554, step = 7300 (0.206 sec)
INFO:tensorflow:global_step/sec: 484.349
INFO:tensorflow:loss = 0.008270181, step = 7400 (0.206 sec)
INFO:tensorflow:global_step/sec: 483.729
INFO:tensorflow:loss = 0.00014045695, step = 7500 (0.207 sec)
INFO:tensorflow:global_step/sec: 484.52
INFO:tensorflow:loss = 0.00015227625, step = 7600 (0.206 sec)
INFO:tensorflow:global_step/sec: 483.84
INFO:tensorflow:loss = 0.00012691527, step = 7700 (0.207 sec)
INFO:tensorflow:global_step/sec: 483.893
INFO:tensorflow:loss = 0.011763247, step = 7800 (0.207 sec)
INFO:tensorflow:global_step/sec: 484.166
INFO:tensorflow:loss = 0.004361762, step = 7900 (0.207 sec)
INFO:tensorflow:global_step/sec: 485.337
INFO:tensorflow:loss = 0.008013714, step = 8000 (0.206 sec)
INFO:tensorflow:global_step/sec: 478.87
INFO:tensorflow:loss = 0.00070219307, step = 

In [17]:
trsin_result

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7fdc333445c0>

## 7. 测试

In [18]:
result = estimator.evaluate(input_fn=test_data_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-01-23T16:14:48Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from tf1_customized_estimator_model/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2020-01-23-16:14:49
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.9905, global_step = 10000, loss = 0.03241456
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 10000: tf1_customized_estimator_model/model.ckpt-10000


In [19]:
result

{'accuracy': 0.9905, 'loss': 0.03241456, 'global_step': 10000}

## 8. 预测

In [27]:
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"images": mnist.test.images[:5]},
    num_epochs=1,
    shuffle=False
)

predict_result = estimator.predict(input_fn=predict_input_fn)

In [28]:
for item in predict_result:
    print(item["logits"])
    print(item["probabilities"])
    print(item["class_ids"])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from tf1_customized_estimator_model/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[ -1.8492608   -6.195212     4.2140293   -1.1713935   -5.965131
 -10.141287    -8.888818    26.20546      0.82345736  -0.1283355 ]
[6.5462100e-13 8.4833340e-15 2.8134756e-10 1.2893873e-12 1.0678015e-14
 1.6398637e-16 5.7378525e-16 1.0000000e+00 9.4784345e-12 3.6591307e-12]
[7]
[  2.4726052    3.0356703   25.621538    -0.69463056  -4.446682
 -17.964666     0.5203086   -2.2105596   -2.2382793   -5.4442    ]
[8.8419071e-11 1.5526794e-10 1.0000000e+00 3.7242045e-12 8.7405330e-14
 1.1769334e-19 1.2550900e-11 8.1784953e-13 7.9549052e-13 3.2234513e-14]
[2]
[-2.6458664   8.36285    -3.0758567  -1.8377589   0.39156744 -2.582029
  2.576137    2.5318596  -0.5712558  -1.8195261 ]
[1.6448190e-05 9.9344301e-01 1.0699795e-