In [1]:
import tensorflow as tf
from tensorflow.python.framework import graph_util

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
input_checkpoint = "model/model.ckpt"
output_graph = "./model.pb"

In [None]:
output_node_names = "fky_input,fky_output,model_1/dense_layer_1/LogSoftmax,model_1/flatten_layer_3/Tanh"
saver = tf.train.import_meta_graph(
    input_checkpoint + '.meta', clear_devices=True)
graph = tf.get_default_graph()  # 获得默认的图
input_graph_def = graph.as_graph_def()  # 返回一个序列化的图代表当前的图
with tf.Session() as sess:
    saver.restore(sess, input_checkpoint)  # 恢复图并得到数据
    output_graph_def = graph_util.convert_variables_to_constants(  # 模型持久化，将变量值固定
        sess=sess,
        input_graph_def=input_graph_def,  # 等于:sess.graph_def
        output_node_names=output_node_names.split(",")
    )

    with tf.gfile.GFile(output_graph, "wb") as f:  # 保存模型
        f.write(output_graph_def.SerializeToString())  
        
    # print("%d ops in the final graph." %
    #       len(output_graph_def.node))  # 得到当前图有几个操作节点
    # for op in graph.get_operations():
    #     print(op.name, op.values())


In [1]:
# Convert to ckpt
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import Layer
import os

class PolicyValueNet():
    def __init__(self, in_model, out_model, board_width=15, board_height=15, cuda=False):
        self.planes_num = 9  # feature planes
        self.nb_block = 19   # resnet blocks
        if cuda == False:
            # use GPU or not ,if there are a few GPUs,it's better to assign GPU ID
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
            os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

        self.board_width = board_width
        self.board_height = board_height

        # Make a session
        self.session = tf.InteractiveSession()
        # 1. Input:
        self.input_states = tf.placeholder(
            tf.float32, shape=[None, self.planes_num, board_height, board_width], name="fky_input")

        self.action_fc_train, self.evaluation_fc2_train = self.network(input_states=self.input_states,
                                                                       reuse=False,
                                                                       is_train=True)
        self.action_fc_test, self.evaluation_fc2_test = self.network(input_states=self.input_states,
                                                                     reuse=True,
                                                                     is_train=False)
        print(self.action_fc_test)
        self.network_all_params = tf.global_variables()

        # Define the Loss function
        # 1. Label: the array containing if the game wins or not for each state
        self.labels = tf.placeholder(tf.float32, shape=[None, 1], name="fky_output")
        # 2. Predictions: the array containing the evaluation score of each state
        # which is self.evaluation_fc2

        # self.network_params = tf.trainable_variables()
        self.network_params = tf.global_variables()
        # for transfer learning use

        # For saving and restoring
        self.saver = tf.train.Saver()

        self.restore_params = []
        for params in self.network_params:
            print(params,'**'*100)
            if ('conv2d' in params.name) or ('resnet' in params.name) or ('bn' in params.name) or ('flatten_layer' in params.name):
                self.restore_params.append(params)
        self.saver_restore = tf.train.Saver(self.restore_params)

        init = tf.global_variables_initializer()
        self.session.run(init)
        print(self.action_fc_test)
        print(self.evaluation_fc2_test)
        print(self.labels)

        self.saver.restore(self.session, in_model)
        self.saver.save(self.session, out_model)
        
    def network(self,input_states,reuse,is_train,label=''):
        # Define the tensorflow neural network
        with tf.variable_scope('model'+label, reuse=reuse):
            # tl.layers.set_name_reuse(reuse)

            input_state = tf.transpose(input_states, [0, 2, 3, 1])
            # NCHW->NHWC
            inputlayer = tl.layers.InputLayer(input_state, name='input')

            # 2. Common Networks Layers
            # these layers designed by myself
            inputlayer = tl.layers.ZeroPad2d(inputlayer,2,name='zeropad2d')
            conv1 = tl.layers.Conv2d(inputlayer,
                                          n_filter=64,
                                          filter_size=(1, 1),
                                          strides=(1, 1),
                                          padding='SAME',
                                          name='conv2d_1')
            residual_layer = self.residual_block(incoming=conv1,
                                                      out_channels=64,
                                                      is_train=is_train,
                                                      nb_block=self.nb_block)
            # 3-1 Action Networks
            # these layers are the same as paper's
            action_conv = tl.layers.Conv2d(residual_layer,
                                                n_filter=2,
                                                filter_size=(1,1),
                                                strides=(1,1),name='conv2d_2')
            action_conv = tl.layers.BatchNormLayer(action_conv,
                                                        act=tf.nn.relu,
                                                        is_train=is_train,
                                                        name='bn_1')
            action_conv_flat = tl.layers.FlattenLayer(action_conv,
                                                           name='flatten_layer_1')
            # 3-2 Full connected layer,
            # the output is the log probability of moves on each slot on the board
            action_fc = tl.layers.DenseLayer(action_conv_flat,
                                                  n_units=self.board_width*self.board_height,
                                                  act=tf.nn.log_softmax,name='dense_layer_1')
            # 4 Evaluation Networks
            # these layers are the same as paper's
            evaluation_conv = tl.layers.Conv2d(residual_layer,
                                                    n_filter=1,
                                                    filter_size=(1,1),
                                                    strides=(1,1),name='conv2d_3')
            evaluation_conv = tl.layers.BatchNormLayer(evaluation_conv,
                                                            act=tf.nn.relu,
                                                            is_train=is_train,
                                                            name='bn_2')
            evaluation_conv_flat = tl.layers.FlattenLayer(evaluation_conv,
                                                               name='flatten_layer_2')
            evaluation_fc1 = tl.layers.DenseLayer(evaluation_conv_flat,
                                                       n_units=256,
                                                       act=tf.nn.relu,
                                                       name='dense_layer_2')
            evaluation_fc2 = tl.layers.DenseLayer(evaluation_fc1,
                                                       n_units=1,
                                                       act=tf.nn.tanh,
                                                       name='flatten_layer_3')

            return action_fc.outputs,evaluation_fc2.outputs

    def residual_block(self,incoming, out_channels, is_train, nb_block=1):
        '''
        a simple resnet block structure
        '''
        resnet = incoming
        for i in range(nb_block):
            identity = resnet
            # in_channels = incoming.outputs.get_shape().as_list()[-1]
            resnet = tl.layers.Conv2d(resnet, n_filter=out_channels, filter_size=(3, 3), strides=(1, 1),
                                      padding='SAME', name='resnet_conv2d_' + str(i) + '_1')
            resnet = tl.layers.BatchNormLayer(resnet, is_train=is_train, act=tf.nn.relu,
                                              name='resnet_bn_' + str(i) + '_1')
            resnet = tl.layers.Conv2d(resnet, n_filter=out_channels, filter_size=(3, 3), strides=(1, 1),
                                      padding='SAME', name='resnet_conv2d_' + str(i) + '_2')
            resnet = tl.layers.BatchNormLayer(resnet, is_train=is_train, name='resnet_bn_' + str(i) + '_2')

            resnet = tl.layers.ElementwiseLayer([resnet, identity], combine_fn=tf.add,
                                                name='elementwise_layer_' + str(i))
            resnet = MyActLayer(resnet, act=tf.nn.relu, name='activation_layer_' + str(i))

        return resnet
    
class MyActLayer(Layer):
    '''
    define an activation layer
    '''
    def __init__(
        self,
        prev_layer = None,
        act = tf.identity,
        name ='activation_layer',
    ):
        Layer.__init__(self, prev_layer=prev_layer,name=name)
        self.inputs = prev_layer.outputs

        with tf.variable_scope(name) as vs:
            self.outputs = act(self.inputs)

        self.all_layers = list(prev_layer.all_layers)
        self.all_params = list(prev_layer.all_params)
        self.all_drop = dict(prev_layer.all_drop)
        self.all_layers.extend( [self.outputs])

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
worker = PolicyValueNet('model/best_policy.model', 'best_policy.ckpt')

[TL] InputLayer  model/input: (?, 15, 15, 9)
[TL] ZeroPad2d   model/zeropad2d: padding: 2
[TL] Conv2d model/conv2d_1: n_filter: 64 filter_size: (1, 1) strides: (1, 1) pad: SAME act: No Activation
Instructions for updating:
Colocations handled automatically by placer.
[TL] Conv2d model/resnet_conv2d_0_1: n_filter: 64 filter_size: (3, 3) strides: (1, 1) pad: SAME act: No Activation
[TL] BatchNormLayer model/resnet_bn_0_1: decay: 0.900000 epsilon: 0.000010 act: relu is_train: True
[TL] Conv2d model/resnet_conv2d_0_2: n_filter: 64 filter_size: (3, 3) strides: (1, 1) pad: SAME act: No Activation
[TL] BatchNormLayer model/resnet_bn_0_2: decay: 0.900000 epsilon: 0.000010 act: No Activation is_train: True
[TL] ElementwiseLayer model/elementwise_layer_0: size: (?, 19, 19, 64) fn: add
[TL] Conv2d model/resnet_conv2d_1_1: n_filter: 64 filter_size: (3, 3) strides: (1, 1) pad: SAME act: No Activation
[TL] BatchNormLayer model/resnet_bn_1_1: decay: 0.900000 epsilon: 0.000010 act: relu is_train: True

In [None]:
class Agent():
    def __init__(self, pb_path):
        with tf.Graph().as_default():
            output_graph_def = tf.GraphDef()
            with open(pb_path, "rb") as f:
                output_graph_def.ParseFromString(f.read())
                tf.import_graph_def(output_graph_def, name="")
            self.sess = tf.Session()

            self.sess.run(tf.global_variables_initializer())
            self.action_fc = self.sess.graph.get_tensor_by_name(
                "model_1/dense_layer_1/LogSoftmax:0")
            self.evaluation_fc2 = self.sess.graph.get_tensor_by_name(
                "model_1/flatten_layer_3/Tanh:0")
            self._input = self.sess.graph.get_tensor_by_name("fky_input:0")

    def calculate(self, state_batch):
        with tf.Graph().as_default():
            out, b = self.sess.run([self.action_fc, self.evaluation_fc2], feed_dict={self._input: state_batch})
            return out, b
