In [1]:
%matplotlib inline

保存/加载参数 - save/load model of TensorFlow
====
>Python2.7 + TensorFlow 1.2.0 backened
>
>MNIST
>在迁移学习中<b>“参数冻结(freeze)”</b>（即参数不可变更，视为常量）和<b>“导入预训练(pre-train)网络参数”</b>（即在网络训练之前，导入训练好的网络桉树，而不是初始化为随机数）

In [2]:
# -*- coding: utf-8 -*-
#!/usr/bin/env python
'''
@author: deep learning textbook of whut
@date: 2017-10-31
'''

from __future__ import print_function

In [3]:
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("../dataset/MNIST_data", one_hot=True)

import tensorflow as tf

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256  # 1st layer number of neurons
n_hidden_2 = 256  # 2nd layer number of neurons
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}


# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

Extracting ../dataset/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../dataset/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../dataset/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../dataset/MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
# Construct model
logits = multilayer_perceptron(X)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost={:.9f}".format(avg_cost))
    print("Optimization Finished!")

Epoch: 0001 cost=330.035436592
Epoch: 0002 cost=98.215009611
Epoch: 0003 cost=72.292875387
Epoch: 0004 cost=58.464913698
Epoch: 0005 cost=49.795348253
Epoch: 0006 cost=43.787151161
Epoch: 0007 cost=39.077048338
Epoch: 0008 cost=35.359896682
Epoch: 0009 cost=32.105365325
Epoch: 0010 cost=30.157992899
Epoch: 0011 cost=28.584873645
Epoch: 0012 cost=25.869300753
Epoch: 0013 cost=25.141015514
Epoch: 0014 cost=23.912697254
Epoch: 0015 cost=22.496437988
Optimization Finished!


In [5]:
# appraoch 1
with tf.Session() as sess:
    sess.run(init)
    # mkdir pare
    import os
    para_dir = 'para/'
    if os.path.exists(para_dir):
        pass
    else:
        os.mkdir(para_dir)

    import numpy as np
    # 保存变量到para目录
    for key, value in weights.items():
        np.save(file=para_dir + key + '_weights.npy', arr=value.eval())

    for key, value in biases.items():
        np.save(file=para_dir + key + '_biases.npy', arr=value.eval())

此时para目录下会有4个文件： <br>
b1_biases.npy <br>
b2_biases.npy <br>
h1_weights.npy <br>
h2_weights.npy <br>
out_biases.npy <br>
out_weights.npy <br>

In [6]:
# 加载模型
with tf.Session() as sess:
    sess.run(init)
    
    for key, value in weights.items():
        value.load(value=np.load(para_dir + key + '_weights.npy'), session=sess)

    for key, value in biases.items():
        value.load(value=np.load(para_dir + key + '_biases.npy'), session=sess)

    # Test model
    pred = tf.nn.softmax(logits)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))

Accuracy: 0.132


In [7]:
# appraoch 2
with tf.Session() as sess:
    sess.run(init)
    
    saver = tf.train.Saver()
    saver.save(sess, './model.ckpt')

此时目录下会有4个文件： <br>
checkpoint <br>
model.ckpt.data-00000-of-00001 <br>
model.ckpt.index <br>
model.ckpt.meta <br>

In [8]:
# 加载整个模型
with tf.Session() as sess:
    sess.run(init)
    
    saver.restore(sess, './model.ckpt')

INFO:tensorflow:Restoring parameters from ./model.ckpt


In [9]:
# 加载相应参数
with tf.Session() as sess:
    sess.run(init)
    
    variables_list = tf.trainable_variables()[:2]
    # 导入部分网络参数
    for variables in variables_list:
        print(variables)
    saver = tf.train.Saver(variables_list)
    saver.restore(sess, './model.ckpt')

<tf.Variable 'Variable:0' shape=(784, 256) dtype=float32_ref>
<tf.Variable 'Variable_1:0' shape=(256, 256) dtype=float32_ref>
INFO:tensorflow:Restoring parameters from ./model.ckpt
