In [1]:
import tensorflow as tf
import numpy as np
# 查询系统可用的 GPU
physical_devices = tf.config.experimental.list_physical_devices('GPU')
# 确保有可用的 GPU 如果没有, 则会报错
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
# 设置参数,该段务必在运行jupyter的第一段代码执行，否则会无法初始化成功
# 仅在需要时申请显存空间（程序初始运行时消耗很少的显存，随着程序的运行而动态申请显存）
tf.config.experimental.set_memory_growth(physical_devices[0], True)

#### 一、Keras版本模型的保存与加载

1. 保存模型权重（model.save_weights)

2. 保存整个模型
- 保存HDF5文件（model.save)
- 保存pb文件（tf.saved_model)
二者的区别在于saved_model格式的模型可直接用于预测，但是save_model没有保存优化器的配置

##### 保存完整的模型有很多应用场景，比如在浏览器中使用tensorflow.js加载运行，比如在移动设备中使用tensorflow lite 加载运行

3. 保存模型与加载模型的案例

In [2]:
x_train = np.random.random((1000,32))
y_train = np.random.randint(10,size =(1000,))
x_val = np.random.random((200,32))
y_val = np.random.randint(10,size = (200,))
x_test = np.random.random((200,32))
y_test = np.random.randint(10,size = (200,))

In [5]:
def get_uncompiled_model():
    inputs = tf.keras.Input(shape = (32,), name = 'digits')
    x = tf.keras.layers.Dense(64, activation= 'relu', name = 'dense_1')(inputs)
    x = tf.keras.layers.Dense(64, activation= 'relu', name = 'dense_2')(x)
    outputs = tf.keras.layers.Dense(10, name = 'predictions')(x)
    model = tf.keras.Model(inputs = inputs, outputs = outputs)
    return model

def get_compiled_model():
    model = get_uncompiled_model()
    model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate = 1e-3),  # 编译模型添加了优化器和损失函数定义
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True),
    metrics = ['sparse_categorical_accuracy']
    )
    return model

In [7]:
model = get_compiled_model()
model.fit(x_train, y_train, batch_size = 32, epochs = 5, validation_data = (x_val, y_val))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f9fc80aa100>

3.1 方法一的模型保存方式

In [5]:
model.save_weights('adasd.h5')
model.load_weights('adasd.h5')
model.predict(x_test)

array([[ 0.08651663, -0.13452132,  0.22873767, ...,  0.14558567,
        -0.00869111, -0.0766616 ],
       [ 0.01916686, -0.11421013,  0.00244431, ..., -0.24580176,
         0.40502584, -0.08497936],
       [ 0.0091402 , -0.12411858,  0.07011697, ...,  0.07928359,
         0.20610042, -0.05843626],
       ...,
       [-0.13298033, -0.19477229,  0.03819231, ..., -0.09306011,
         0.27247036, -0.06529197],
       [-0.23828475, -0.18567482,  0.42255732, ...,  0.11183605,
        -0.03134666, -0.21639194],
       [ 0.04447995, -0.17535129,  0.19586767, ...,  0.11469207,
         0.1354679 , -0.03459841]], dtype=float32)

In [6]:
model.save_weights('./checkpoints/mannul_checkpoint')

In [7]:
model.load_weights('./checkpoints/mannul_checkpoint')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fb08046aa90>

In [8]:
model.predict(x_test)

array([[ 0.08651663, -0.13452132,  0.22873767, ...,  0.14558567,
        -0.00869111, -0.0766616 ],
       [ 0.01916686, -0.11421013,  0.00244431, ..., -0.24580176,
         0.40502584, -0.08497936],
       [ 0.0091402 , -0.12411858,  0.07011697, ...,  0.07928359,
         0.20610042, -0.05843626],
       ...,
       [-0.13298033, -0.19477229,  0.03819231, ..., -0.09306011,
         0.27247036, -0.06529197],
       [-0.23828475, -0.18567482,  0.42255732, ...,  0.11183605,
        -0.03134666, -0.21639194],
       [ 0.04447995, -0.17535129,  0.19586767, ...,  0.11469207,
         0.1354679 , -0.03459841]], dtype=float32)

3.2 方法二的模型保存方式

In [9]:
model.save('path_to_saved_model',save_format='tf')
new_model = tf.keras.models.load_model('path_to_saved_model')
new_model.predict(x_test)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: path_to_saved_model/assets


array([[ 0.08651663, -0.13452132,  0.22873767, ...,  0.14558567,
        -0.00869111, -0.0766616 ],
       [ 0.01916686, -0.11421013,  0.00244431, ..., -0.24580176,
         0.40502584, -0.08497936],
       [ 0.0091402 , -0.12411858,  0.07011697, ...,  0.07928359,
         0.20610042, -0.05843626],
       ...,
       [-0.13298033, -0.19477229,  0.03819231, ..., -0.09306011,
         0.27247036, -0.06529197],
       [-0.23828475, -0.18567482,  0.42255732, ...,  0.11183605,
        -0.03134666, -0.21639194],
       [ 0.04447995, -0.17535129,  0.19586767, ...,  0.11469207,
         0.1354679 , -0.03459841]], dtype=float32)

3.3 方法三的模型保存方式

In [10]:
model.save('path_to_my_model.h5')
new_model = tf.keras.models.load_model('path_to_my_model.h5')
new_model.predict(x_test)

array([[ 0.08651663, -0.13452132,  0.22873767, ...,  0.14558567,
        -0.00869111, -0.0766616 ],
       [ 0.01916686, -0.11421013,  0.00244431, ..., -0.24580176,
         0.40502584, -0.08497936],
       [ 0.0091402 , -0.12411858,  0.07011697, ...,  0.07928359,
         0.20610042, -0.05843626],
       ...,
       [-0.13298033, -0.19477229,  0.03819231, ..., -0.09306011,
         0.27247036, -0.06529197],
       [-0.23828475, -0.18567482,  0.42255732, ...,  0.11183605,
        -0.03134666, -0.21639194],
       [ 0.04447995, -0.17535129,  0.19586767, ...,  0.11469207,
         0.1354679 , -0.03459841]], dtype=float32)

 3.4 方法四的模型保存方式,常用于模型的部署上使用

In [14]:
tf.saved_model.save(model,'tf_saved_model_version')
restored_saved_model = tf.saved_model.load('tf_saved_model_version')

INFO:tensorflow:Assets written to: tf_saved_model_version/assets


In [15]:
f = restored_saved_model.signatures['serving_default']

In [16]:
f(digits = tf.constant(x_test.tolist()))

{'predictions': <tf.Tensor: shape=(200, 10), dtype=float32, numpy=
 array([[ 0.08651663, -0.13452132,  0.22873767, ...,  0.14558567,
         -0.00869111, -0.0766616 ],
        [ 0.01916686, -0.11421013,  0.00244431, ..., -0.24580176,
          0.40502584, -0.08497936],
        [ 0.0091402 , -0.12411858,  0.07011697, ...,  0.07928359,
          0.20610042, -0.05843626],
        ...,
        [-0.1329803 , -0.19477235,  0.03819229, ..., -0.09306011,
          0.27247036, -0.06529202],
        [-0.23828472, -0.18567485,  0.42255732, ...,  0.11183609,
         -0.03134666, -0.2163919 ],
        [ 0.04447995, -0.17535134,  0.19586769, ...,  0.11469213,
          0.13546787, -0.03459835]], dtype=float32)>}

In [17]:
!saved_model_cli show --dir tf_saved_model_version --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['digits'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 32)
        name: serving_default_digits:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['predictions'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 10)
        name: StatefulPartitionedCall:0
  Method name is: tensorflow/serving/predict
Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Defined Functions:
  Function Name: '__call__'
    Opt

#### 二、自定义版本模型的保存与加载

1. 保存checkpoint模型权重

2. 保存整个模型
- 保存HDF5文件（model.save)
- 保存pb文件（tf.saved_model)
二者的区别在于saved_model格式的模型可直接用于预测，但是save_model没有保存优化器的配置

In [22]:
class MyModel(tf.keras.Model):
    def __init__(self, num_classes = 10):
        super(MyModel,self).__init__(name = 'my_model')
        self.num_classes = num_classes
        # 定义自己需要的层
        self.dense_1 = tf.keras.layers.Dense(32, activation = 'relu')
        self.dense_2 = tf.keras.layers.Dense(num_classes)
        
    @tf.function(input_signature = [tf.TensorSpec([None,32],tf.float32,name = 'inputs')])
    def call(self, inputs):
        # 定义前向传播
        # 使用在（__init__)z中定义的层
        x= self.dense_1(inputs)
        return self.dense_2(x)

In [24]:
x_train = np.random.random((1000,32))
y_train = np.random.random((1000,10))
x_val = np.random.random((200,32))
y_val = np.random.random((200,10))
x_test = np.random.random((200,32))
y_test = np.random.random((200,10))

In [28]:
# 优化器
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)

In [33]:
# 定义损失函数
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

In [34]:
# 定义评估函数
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = tf.keras.metrics.CategoricalAccuracy() 

In [35]:
# 准备训练数据集
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_dataset = train_dataset.shuffle(buffer_size = 1024).batch(batch_size)

In [37]:
# 准备测试数据集
val_dataset = tf.data.Dataset.from_tensor_slices((x_val,y_val))
val_dataset = val_dataset.batch(64)

In [43]:
model = MyModel(num_classes = 10)
epochs = 3
for epoch in range(epochs):
    print('start of epoch %d'  % (epoch,))
    
    # 遍历数据集的batch_size
    for step,(x_batch_train,y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train)
            loss_value = loss_fn(y_batch_train,logits)
        grads = tape.gradient(loss_value,model.trainable_weights)
        optimizer.apply_gradients(zip(grads,model.trainable_weights))
        
        # 更新训练集的metrics
        train_acc_metric(y_batch_train,logits)
        
        # 每200 batch_size 打印一次结果
        if step%200 == 0:
            print('training loss (for one batch) at step %s:%s' %(step,float(loss_value)))
            print('seen so far: %s samples' % ((step + 1) * 64))
        
        # 在每一个epoch结束的时候显示metrics
        train_acc = train_acc_metric.result()
        print('training acc over epoch: %s' % (float(train_acc),))
        # 在每个epoch结束时重置训练指标
        train_acc_metric.reset_states()
        
        #在每个epoch结束时运行一个验证集
        for x_batch_val,y_batch_val in val_dataset:
            val_logits = model(x_batch_val)
            # 更新验证集metric
            val_acc_metric(y_batch_val,val_logits)
        val_acc = val_acc_metric.result()
        val_acc_metric.reset_states()
        print('validation acc: %s' % (float(val_acc),))

start of epoch 0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

training loss (for one batch) at step 0:11.792417526245117
seen so far: 64 samples
training acc over epoch: 0.09301051497459412
validation acc: 0.11999999731779099
training acc over epoch: 0.171875
validation acc: 0.11999999731779099
training acc over epoch: 0.078125
validation acc: 0.125
training acc over epoch: 0.078125
validation acc: 0.12999999523162842
training acc over epoch: 0.125
validation acc: 0.12999999523162842
training acc over epoch: 0.078125
validation acc: 0.125
training acc over epoch: 0.109375
validation acc: 0.125
training acc over epoch: 0.078125
validation acc: 0.125
training acc over epoch: 0.125
validation acc: 0.11999999731779099
training acc over ep

#### 自定义模型的保存方法一

其实和上面的举例的四个方法是一致的，但是方法二是不行的，不能保存为h5格式

In [44]:
# 此处不再赘述