tensorflow 的保存和恢复功能十分方便，也十分强大。<br/>
我们既可以保存/恢复变量，也可以保存/恢复整个模型。<br/>
下面是几种保存/恢复的方法

# 1、保存恢复变量

In [1]:
"""
保存所有变量
"""
import tensorflow as tf
from features import utils
import os

# 定义变量
v1 = tf.get_variable('v1', shape=[3], initializer=tf.zeros_initializer)
v2 = tf.get_variable('v2', shape=[5], initializer=tf.zeros_initializer)

inc_v1 = v1.assign(v1 + 1)
dec_v2 = v2.assign(v2 - 1)

# 定义初始化操作
init_op = tf.global_variables_initializer()

saver = tf.train.Saver()

with tf.Session() as sess:
    # 初始化变量
    sess.run(init_op)
    # 计算
    inc_v1.op.run()
    dec_v2.op.run()
    # 保存变量(此处文件夹需提前生成)
    save_path = saver.save(sess, os.path.join(utils.localDir(), './tmp/save_restore_variable/model.ckpt'))
    print("model saved in path:%s " % save_path)


model saved in path:/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/save_restore_variable/model.ckpt 


In [2]:
"""
恢复所有变量
"""
tf.reset_default_graph()
v1 = tf.get_variable("v1", shape=[3])
v2 = tf.get_variable("v2", shape=[5])

saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, os.path.join(utils.localDir(), './tmp/save_restore_variable/model.ckpt'))
    print("v1 :%s " % v1.eval())
    print("v2 :%s " % v2.eval())


INFO:tensorflow:Restoring parameters from /Users/beiyan/Documents/Projects/machine_learning/features/./tmp/save_restore_variable/model.ckpt


v1 :[1. 1. 1.] 
v2 :[-1. -1. -1. -1. -1.] 


In [3]:
"""
恢复部分变量
"""
tf.reset_default_graph()
v1 = tf.get_variable("v1", [3], initializer=tf.zeros_initializer)
v2 = tf.get_variable("v2", [5], initializer=tf.zeros_initializer)
# 只恢复v2 变量
saver = tf.train.Saver({"v2": v2})

with tf.Session() as sess:
    v1.initializer.run()
    saver.restore(sess, os.path.join(utils.localDir(), "./tmp/save_restore_variable/model.ckpt"))
    # v1 未恢复 v2恢复
    print("v1 : %s" % v1.eval())
    print("v2 : %s" % v2.eval())


INFO:tensorflow:Restoring parameters from /Users/beiyan/Documents/Projects/machine_learning/features/./tmp/save_restore_variable/model.ckpt


v1 : [0. 0. 0.]
v2 : [-1. -1. -1. -1. -1.]


In [4]:
"""
检查某个检查点中的变量
""" 
from tensorflow.python.tools import inspect_checkpoint as chkp

model_path = os.path.join(utils.localDir(), './tmp/save_restore_variable/model.ckpt')
print("-----所有变量-----")
chkp.print_tensors_in_checkpoint_file(model_path, tensor_name='', all_tensors=True)
print("-----v1-----")
chkp.print_tensors_in_checkpoint_file(model_path, tensor_name='v1', all_tensors=False)
print("-----v2-----")
chkp.print_tensors_in_checkpoint_file(model_path, tensor_name='v2', all_tensors=False)


-----所有变量-----
tensor_name:  v1
[1. 1. 1.]
tensor_name:  v2
[-1. -1. -1. -1. -1.]
-----v1-----
tensor_name:  v1
[1. 1. 1.]
-----v2-----
tensor_name:  v2
[-1. -1. -1. -1. -1.]


# 2、保存和恢复模型
与只保存变量相比，保存模型更为有用。模型model包括--变量，图，图的元数据<br/>
我们最希望的情况是：可以保存一个训练好的模型，下次直接读取这个训练好的模型进行预测<br/>
tensorflow 提供 ***简单保存模型*** 以及 ***通过builder进行保存*** <br/>


## 简单保存模型
简单保存主要是通过  tf.saved_model.simple_save

In [5]:
"""
保存模型
"""
import os

tf.reset_default_graph()

x = tf.placeholder(dtype=tf.float32, shape=[None, 2], name='input-x')
w = tf.constant([[1], [2]], dtype=tf.float32)
b = tf.get_variable('v1', [1], dtype=tf.float32, initializer=tf.random_normal_initializer)
y = tf.add(tf.matmul(x, w), b,name='predit')
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    print(sess.run(y, feed_dict={x: [[1, 2], [3, 4]]}))

    # 假设上面已经是训练好的模型了，此处进行保存
    export_dir = os.path.join(utils.localDir(), './tmp/simple_save_restore/')
    tf.saved_model.simple_save(sess,
                               export_dir,
                               inputs={"x": x},
                               outputs={"y": y})


[[ 4.8252225]
 [10.825223 ]]
INFO:tensorflow:Assets added to graph.


INFO:tensorflow:No assets to write.


INFO:tensorflow:SavedModel written to: b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/simple_save_restore/saved_model.pb'


In [6]:
"""
恢复应用模型
"""
# 注意 ；直接使用tf.python.saved_model.tag_constants.SERVING 会报错
from tensorflow.python.saved_model import tag_constants

tf.reset_default_graph()
path = os.path.join(utils.localDir(), './tmp/simple_save_restore/')
with tf.Session() as sess:
    # SERVING = "serve"
    # tf_export("saved_model.tag_constants.SERVING").export_constant(
    #     __name__, "SERVING")
    tf.saved_model.loader.load(sess, [tag_constants.SERVING], path)
    x = sess.graph.get_tensor_by_name('input-x:0')
    y = sess.graph.get_tensor_by_name('predit:0')
    print(sess.run(y, feed_dict={x: [[1, 2], [3, 4]]}))


INFO:tensorflow:Restoring parameters from b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/simple_save_restore/variables/variables'


[[ 4.8252225]
 [10.825223 ]]


## 手动构建SavedModel

In [7]:
"""
保存模型
"""
tf.reset_default_graph()
x = tf.placeholder(shape=[3], dtype=tf.float32, name='input-x')

w = tf.get_variable('w', shape=[3], initializer=tf.random_uniform_initializer, dtype=tf.float32)
z = tf.add(x, w, name='output-z')

# builder
export_dir = os.path.join(utils.localDir(), './tmp/builder_save_restore')
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    print(sess.run(z, feed_dict={x: [2, 3, 4]}))
    #保存
    builder.add_meta_graph_and_variables(sess,
                                         ['saved_test'],
                                         strip_default_attrs=True)
builder.save()


[2.294889 3.687993 4.652502]
INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to write.


INFO:tensorflow:SavedModel written to: b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/builder_save_restore/saved_model.pb'


b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/builder_save_restore/saved_model.pb'

In [8]:
"""
恢复应用模型
"""
tf.reset_default_graph()
export_dir = os.path.join(utils.localDir(), './tmp/builder_save_restore')
with tf.Session() as sess:
    tf.saved_model.loader.load(sess, ['saved_test'], export_dir)
    x = sess.graph.get_tensor_by_name('input-x:0')
    z = sess.graph.get_tensor_by_name('output-z:0')
    print(sess.run(z, feed_dict={x: [2, 3, 4]}))


INFO:tensorflow:Restoring parameters from b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/builder_save_restore/variables/variables'


[2.294889 3.687993 4.652502]


## 使用SignatureDef 保存恢复模型
以上两种保存恢复模型的方式都有一个问题，就是需要知道 输入节点 和 输出节点 在计算图中的名字<br/>
如果使我们自己训练的模型，这样恢复也是可以的。但是如果使用别人训练好的模型，有时候我们是不知道节点的名字的。<br/>
tensorflow 提供了SignatureDef，可以使我们更方便地定义模型中的输入输出<br/>
可以理解为：SignatureDef定义了一些协议，对我们所需的信息进行封装，我们根据这套协议来获取信息，从而实现创建与使用模型的解耦。<br/>
SignatureDef，将输入输出tensor的信息都进行了封装，并且给他们一个自定义的别名，所以在构建模型的阶段，可以随便给tensor命名，只要在保存训练好的模型的时候，在SignatureDef中给出统一的别名即可。

In [9]:
"""
保存模型
"""
import numpy as np

tf.reset_default_graph()


# 一个简单的两层全连接网络
def inference(input_data):
    w1 = tf.get_variable(name='w1', initializer=tf.truncated_normal(shape=[4, 3], stddev=0.01))
    fc1 = tf.matmul(input_data, w1)
    w2 = tf.get_variable(name='w2', initializer=tf.truncated_normal(shape=[3, 1], stddev=0.01))
    return tf.matmul(fc1, w2)


# 生成模拟测试数据
def generateData():
    x = np.ceil(np.random.rand(10, 4) * 10)
    y = np.matmul(x, np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.], [4., 4., 4.]]))
    y = np.matmul(y, np.array([[1.], [2.], [3.]]))
    x = x.astype(np.float32)
    y = y.astype(np.float32)
    return x, y


# 输入
x = tf.placeholder(shape=[None, 4], dtype=tf.float32)
y = tf.placeholder(shape=[None, 1], dtype=tf.float32)

# 损失与优化
logit = inference(x)
loss = tf.losses.mean_squared_error(logit, y)
optimize = tf.train.GradientDescentOptimizer(0.0001).minimize(loss)

# 训练数据较少时可以使用tf.data 进行混排
train_data, train_label = generateData()
data = tf.data.Dataset.from_tensor_slices(train_data)
label = tf.data.Dataset.from_tensor_slices(train_label)
dataset = tf.data.Dataset.zip((data, label))
dataset = dataset.repeat(100).batch(5)
dataset_iterator = dataset.make_initializable_iterator()
next_data, next_label = dataset_iterator.get_next()

export_dir = os.path.join(utils.localDir(), './tmp/signature_save_restore')

builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
inputs = {"input-x": tf.saved_model.utils.build_tensor_info(x)}
outputs = {"output": tf.saved_model.utils.build_tensor_info(logit)}
signature = tf.saved_model.signature_def_utils.build_signature_def(inputs=inputs, outputs=outputs,
                                                                   method_name='test_sig_name')

with tf.Session() as sess:
    # 初始化变量
    tf.global_variables_initializer().run()
    # 初始化迭代器
    sess.run(dataset_iterator.initializer)

    for i in range(200):
        batch_data, batch_label = sess.run([next_data, next_label])
        _, l = sess.run([optimize, loss], feed_dict={x: batch_data, y: batch_label})
        if i % 20 == 0:
            print("after step %3d ,the loss is %.6f" % (i, l))

    builder.add_meta_graph_and_variables(sess, 'test_saved_model', {'test_signature': signature})
    builder.save()


after step   0 ,the loss is 56239.167969
after step  20 ,the loss is 15322.833984
after step  40 ,the loss is 96.053955
after step  60 ,the loss is 16.751575
after step  80 ,the loss is 6.228118
after step 100 ,the loss is 2.361595
after step 120 ,the loss is 0.908338
after step 140 ,the loss is 0.354578
after step 160 ,the loss is 0.140375
after step 180 ,the loss is 0.056296
INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to write.


INFO:tensorflow:SavedModel written to: b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/signature_save_restore/saved_model.pb'


In [10]:
"""
恢复应用模型
"""
meta_graph_tag = 'test_saved_model'
signature_key = 'test_signature'
input_key = 'input-x'
output_key = 'output'

saved_dir = os.path.join(utils.localDir(), './tmp/signature_save_restore')
with tf.Session() as sess:
    meta_graph_def = tf.saved_model.loader.load(sess, meta_graph_tag, saved_dir)
    signature = meta_graph_def.signature_def

    x_tensor_name = signature[signature_key].inputs[input_key].name
    y_tensor_name = signature[signature_key].outputs[output_key].name
    x = sess.graph.get_tensor_by_name(x_tensor_name)
    y = sess.graph.get_tensor_by_name(y_tensor_name)

    y = sess.run(y, feed_dict={x: [[1, 2, 3, 4], [2, 3, 4, 5]]})
    print(y)


INFO:tensorflow:Restoring parameters from b'/Users/beiyan/Documents/Projects/machine_learning/features/./tmp/signature_save_restore/variables/variables'


[[179.89954]
 [239.87839]]
