[toc]

# Tensorflow Estimator

In [1]:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf

## 定义模型

In [2]:
def create_model(features, feature_columns, hiddens, output_dim):
    inputs = tf.feature_column.input_layer(features=features, feature_columns=feature_columns)

    for hidden_unit in hiddens:
        inputs = tf.layers.dense(inputs=inputs, units=hidden_unit, activation=tf.nn.relu)
    logits = tf.layers.dense(inputs=inputs, units=output_dim)
    return logits

## 定义模型层

模型层是一个函数，返回一个 `tf.estimator.EstimatorSpec`

In [34]:
def model_fn_builder(lr):
    
    def model_fn(features, labels, mode, params, config):
        logits = create_model(features, params['feature_columns'], params['hiddens'], params['output_dim'])
        predict_pro  = tf.nn.softmax(logits)
        predict_cls = tf.argmax(logits, axis=1)
        if mode != tf.estimator.ModeKeys.PREDICT:
            loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,logits=logits)
        print(mode)
        def get_metric(labels, predictions):
            '''
            define metrics
            '''
            accuracy = tf.metrics.accuracy(labels=labels, 
                                           predictions=predictions, 
                                           name='iris_accuracy')
            recall = tf.metrics.recall(labels=labels,
                                       predictions=predictions,
                                       name='iris_recall')
            precision, precision_update=tf.metrics.precision(labels=labels,predictions=predictions,name='iris_precision')
            
            return {
                'accuracy':accuracy,
                'recall': recall,
                'precision':(precision,precision_update)                  
            }

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer(lr).minimize(loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op,
                                              eval_metric_ops=get_metric(labels,predict_cls))
        
        elif mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              eval_metric_ops=get_metric(labels,predict_cls))
        
        elif mode == tf.estimator.ModeKeys.PREDICT or mode == tf.estimator.ModeKeys.INFER:
            predictions={'predict_cls':predict_cls,
                         'predict_pro':predict_pro}
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)  
    return model_fn

In [4]:
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

def downloadfiles():
    train_path = tf.keras.utils.get_file(fname=r'./data', origin=TRAIN_URL)
    test_path = tf.keras.utils.get_file(fname=r'./data', origin=TEST_URL)
    return train_path, test_path

train_path,test_path = downloadfiles()
print("train_path: {}\ntest_path: {}".format(train_path, test_path))

train_path: /Users/bytedance/.keras/datasets/./data
test_path: /Users/bytedance/.keras/datasets/./data


In [5]:
BATCH_SIZE = 16
EPOCHS = 400
STEPS = 40
LR = 0.0001

## 定义输入层

输出层是一个函数，返回 dataset

In [6]:
CSV_TYPES=[[0.0], [0.0], [0.0], [0.0], [0]]
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'label']
label = ['Setosa', 'Versicolor', 'Virginica']

def input_fn_builder(file_path, epochs, batch_size, istrain=False):
    
    def parse_line(line): # 这个给 map 函数用来解析行
        '''
        parse csv line to features fromat
        '''
        fileds = tf.decode_csv(line,record_defaults=CSV_TYPES)
        features = dict(zip(CSV_COLUMN_NAMES,fileds))
        label = features.pop('label')
        return features,label
    
    def input_fn():
        dataset = tf.data.TextLineDataset(file_path).skip(1)
        dataset = dataset.map(parse_line)
        if istrain:
            dataset = dataset.shuffle(1000)
        dataset = dataset.repeat(epochs).batch(batch_size)
        return dataset # 返回的 顺序要和 model_fn一致 或者 dataset元素 格式为（features,label）元组 也可以
    
    return input_fn

In [7]:
model_dir = r'./model'
params = {}
feature_columns = []
for i in range(len(CSV_COLUMN_NAMES)-1):
    feature_columns.append(
        tf.feature_column.numeric_column(CSV_COLUMN_NAMES[i])
    )
params['feature_columns'] = feature_columns
params['hiddens'] = [128, 256, 256]
params['output_dim'] = len(label)

In [32]:
config = tf.estimator.RunConfig(save_checkpoints_steps=100)
estimator = tf.estimator.Estimator(
    model_fn=model_fn_builder(LR), # 这里需要一个函数
    model_dir=model_dir, 
    params=params,
    config=config)

INFO:tensorflow:Using config: {'_model_dir': './model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 100, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff789f2b650>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [9]:
train = estimator.train(input_fn=input_fn_builder(file_path=train_path,
                                                    batch_size=BATCH_SIZE,
                                                    epochs=EPOCHS), # 这里也需要一个函数
                        steps=STEPS)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.

INFO:tensorflow:Calling model_fn.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Rest

### evaluate


In [10]:
# evaluate(  input_fn,    steps=None,    hooks=None,    checkpoint_path=None,    name=None)
estimator.evaluate(input_fn=input_fn_builder(file_path=test_path,
                                            batch_size=BATCH_SIZE,
                                            epochs=EPOCHS), steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-01-26T18:39:14Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-80
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [4/40]
INFO:tensorflow:Evaluation [8/40]
INFO:tensorflow:Evaluation [12/40]
INFO:tensorflow:Evaluation [16/40]
INFO:tensorflow:Evaluation [20/40]
INFO:tensorflow:Evaluation [24/40]
INFO:tensorflow:Evaluation [28/40]
INFO:tensorflow:Evaluation [32/40]
INFO:tensorflow:Evaluation [36/40]
INFO:tensorflow:Evaluation [40/40]
INFO:tensorflow:Finished evaluation at 2021-01-26-18:39:14
INFO:tensorflow:Saving dict for global step 80: accuracy = 0.7109375, global_step = 80, loss = 0.60199016, precision = 1.0, recall = 1.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 80: ./model/model.ckpt-80


{'accuracy': 0.7109375,
 'loss': 0.60199016,
 'precision': 1.0,
 'recall': 1.0,
 'global_step': 80}

### predict 

In [11]:
# predict(    input_fn,    predict_keys=None,    hooks=None,    checkpoint_path=None,    yield_single_examples=True)
estimator.predict(...)

<generator object Estimator.predict at 0x7ff7999ac750>

## serving

tensorflow 使用 pb 模型格式作为 serving 的模型。而 train 和 test 还都是 checkpoint 格式的数据，需要将我们 train 出来的 checkpoint 格式的数据转换为 pb 格式的数据。

`tf.estimator` 提供了 `tf.estimator.export_savedmodel` 这个函数来实现上面的功能，它做了下面的几件事

1. 增加placeholders到graph中，serving系统在获得inference请求时会进行feed数据

2. 增加了额外ops：可以将原有输入格式的数据转换成模型所需特征tensors

### 定义 serving 层

#### tf.estimator.export.ServingInputReceiver

In [31]:
def serving_input_receiver_fn():
    input_str = tf.placeholder(tf.string,name='inputs')
    
    # 在这里的处理方式，根据输入的不同，处理方式 会不同，我这里只是demo
    line = tf.string_split(input_str,',').values 
    features = {
      'SepalLength': tf.string_to_number([line[0]], tf.float32),
      'SepalWidth': tf.string_to_number([line[1]], tf.float32),
      'PetalLength':  tf.string_to_number([line[2]], tf.float32),
      'PetalWidth': tf.string_to_number([line[3]], tf.float32)
    }   
    
    receiver_tensors = {'inputs': input_str}

    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

上面的例子中，有 receiver_tensors 和 features，其中 reciever_tensors 是我们的输入，而 reciever_tensors 是模型的输入。 `serving_input_receiver_fn` 的第二个作用就是编写将 receiver_tensors 变成 features 的逻辑。

#### tf.estimator.export.build_raw_serving_input_receiver_fn

如果我们的输入不需要经过处理，那么可以简单的使用 `tf.estimator.export.build_raw_serving_input_receiver_fn` 函数

In [38]:
def raw_serving_input_fn():
    SepalLength = tf.placeholder(tf.float32, [None], name='SepalLength')
    SepalWidth = tf.placeholder(tf.float32, [None], name='SepalWidth')
    PetalLength = tf.placeholder(tf.float32, [None], name='PetalLength')
    PetalWidth = tf.placeholder(tf.float32, [None], name='PetalWidth')
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'SepalLength': SepalLength,
        'SepalWidth': SepalWidth,
        'PetalLength': PetalLength,
        'PetalWidth': PetalWidth,
    })()
    return input_fn

### 导出模型

In [44]:
estimator.export_savedmodel('export_base/iris', serving_input_receiver_fn=raw_serving_input_fn)

INFO:tensorflow:Calling model_fn.
infer
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-80
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: export_base/iris/temp-b'1611673453'/saved_model.pb


b'export_base/iris/1611673453'

可以看到，export_base/iris 目录下多了一个 1611673453 目录，这个目录中存放这 pb 文件和 variables 文件

In [55]:
!tree export_base/iris

[01;34mexport_base/iris[00m
└── [01;34m1611673453[00m
    ├── saved_model.pb
    └── [01;34mvariables[00m
        ├── variables.data-00000-of-00001
        └── variables.index

2 directories, 3 files


### 使用 saved_model_cli

In [48]:
!saved_model_cli show --dir export_base/iris/1611673453 --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['PetalLength'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1)
        name: PetalLength_1:0
    inputs['PetalWidth'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1)
        name: PetalWidth_1:0
    inputs['SepalLength'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1)
        name: SepalLength_1:0
    inputs['SepalWidth'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1)
        name: SepalWidth_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['predict_cls'] tensor_info:
        dtype: DT_INT64
        shape: (-1)
        name: ArgMax:0
    outputs['predict_pro'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 3)
        name: Softmax:0
  Method name is: tensorflow/serving/predict


使用 saved_model_cli 还可以用一组输入进行测试

In [49]:
!saved_model_cli run --dir export_base/iris/1611673453 \
    --tag_set serve \
    --signature_def "serving_default" \
    --input_expr 'SepalLength=[5.1,5.9,6.9];SepalWidth=[3.3,3.0,3.1];PetalLength=[1.7,4.2,5.4];PetalWidth=[0.5,1.5,2.1]'

2021-01-26 23:06:52.487617: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2021-01-26 23:06:52.499550: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fa801fbb830 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-01-26 23:06:52.499577: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
Result for output key predict_cls:
[0 2 2]
Result for output key predict_pro:
[[0.66284543 0.18443526 0.15271927]
 [0.17048366 0.37887084 0.45064554]
 [0.08769966 0.36253315 0.54976714]]


In [None]:
saved_model_cli run --dir intent_model_correct-1.savedmodel/1611676053 \
    --tag_set serve \
    --signature_def "serving_default" \
    --input_expr 'input_ids=[[1,2,3,4,5,6,7,8,9,10]]'

In [14]:
model_path = 'export_base/iris/1608121703'
# 2. 使用 tornado/flask
# steps：
# 1. load model
predictor = tf.contrib.predictor.from_saved_model(model_path) # model_path必须指定具体的版本号

# 2. predict
predict_result = predictor(input_params) # input_params 格式必须 符合 serving_input_receiver_fn中入参
                                        #     predict_result 格式和 model_fn中返回格式一致
# 3. using tornado
class b_vxHandler(tornado.web.RequestHandler): 

    def post(self, version):
        try:
            predict_result = predictor(input_params)
        except BaseException as err:
            self.finish(....)


application = tornado.web.Application([
    (r"/b/(?P<version>v\d+)", b_vxHandler),
])


if __name__ == "__main__":
    # tornado.options.parse_command_line()
    application.listen(options.port)
    tornado.ioloop.IOLoop.instance().start()

SyntaxError: invalid syntax (<ipython-input-14-fd5f86d07d1f>, line 17)

# References
1. [《Estimator工程实现》系列三： SavedModel模型保存导出示例 - 简书](https://www.jianshu.com/p/72058da4d7f7)

2. [tensorflow中模型的保存与使用总结 — carlos9310](https://carlos9310.github.io/2019/10/13/tensorflow-model-save-use/#run)