# 基礎から実践 TensorFlow 重み共有
TensorFlow での重み共有と tf.variable_scope() を動かしてみるノートブックです。
[公式：Variables](https://www.tensorflow.org/programmers_guide/variables)


In [1]:
import tensorflow as tf

In [2]:
def show_variables():
    print('\n'.join([v.name for v in tf.global_variables()]))

# 基本： Variable, variable_scope, get_variable

## Variable と get_variable

In [3]:
# variable_scope 無しでの Variable/get_variable
with tf.Session(graph=tf.Graph()) as sess:
    v_a = tf.Variable(tf.random_uniform(shape=[2, 3]), name='variable')  # -> variable:0
    v_b = tf.Variable(tf.random_uniform(shape=[2, 3]), name='variable')  # -> variable_1:0
    v_c = tf.get_variable('variable', shape=[2, 3])  # -> variable_2:0
    # v_d = tf.get_variable('variable', shape=[2, 3])  # -> ERROR
    show_variables()

variable:0
variable_1:0
variable_2:0


## variable_scope で変数を階層化・共有する

In [4]:
with tf.Session(graph=tf.Graph()) as sess:
    with tf.variable_scope('hoge'):
        v_a = tf.get_variable('variable', shape=[2, 3])  # -> hoge/variable_2:0
        # v_b = tf.get_variable('variable', shape=[2, 3])  #-> ERROR
        
    with tf.variable_scope('hoge', reuse=True):  # 変数を共有
        v_c = tf.get_variable('variable', shape=[2, 3])  # -> hoge/variable_2:0 共有した！
        # v_d = tf.get_variable('variable_new', shape=[2, 3])  # -> ERROR
        show_variables()

hoge/variable:0


## variable_scope の reuse option

In [5]:
with tf.Session(graph=tf.Graph()) as sess:
    with tf.variable_scope('reuse_none', reuse=None):  # 再利用しない。デフォルト。
        v_none_a = tf.get_variable('variable', shape=[2, 3])  # -> reuse_none/variable:0
        # v_none_b = tf.get_variable('variable', shape=[2, 3])  # -> ERROR

    with tf.variable_scope('reuse_true', reuse=None):  # reuse=True だと変数の作成ができないので予め reuse=None で作る
        v_true_a = tf.get_variable('variable', shape=[2, 3])  # -> reuse_true/variable:0
    with tf.variable_scope('reuse_true', reuse=True):
        v_true_a = tf.get_variable('variable', shape=[2, 3])  # -> reuse_true/variable:0
        # v_true_b = tf.get_variable('variable_b', shape=[2, 3])  # -> ERROR

    with tf.variable_scope('auto_reuse', reuse=tf.AUTO_REUSE):  # 無ければ作成、あれば再利用。便利だけど危険。
        v_none_a = tf.get_variable('variable', shape=[2, 3])  # -> auto_reuse/variable:0
        v_none_b = tf.get_variable('variable', shape=[2, 3])  # -> auto_reuse/variable:0

    show_variables()

reuse_none/variable:0
reuse_true/variable:0
auto_reuse/variable:0


In [6]:
with tf.Session(graph=tf.Graph()) as sess:
    with tf.variable_scope('reuse_true') as scope:
        # reuse=None (default)
        v_true_a = tf.get_variable('variable', shape=[2, 3])  # -> reuse_true/variable:0

        scope.reuse_variables()
        # reuse=True
        v_true_a = tf.get_variable('variable', shape=[2, 3])  # -> reuse_true/variable:0
        # v_true_b = tf.get_variable('variable_b', shape=[2, 3])  # -> ERROR
    show_variables()

reuse_true/variable:0


## reuse option の継承

In [7]:
def show_reuse(scope):
    print('{}: {}'.format(scope.name, scope.reuse))
    
with tf.variable_scope('reuse_none', reuse=None) as outer_scope:
    show_reuse(outer_scope)
    with tf.variable_scope('reuse_none', reuse=None) as inner_scope:
        show_reuse(inner_scope)
    with tf.variable_scope('reuse_true', reuse=True) as inner_scope:
        show_reuse(inner_scope)
    with tf.variable_scope('auto_reuse', reuse=tf.AUTO_REUSE) as inner_scope:
        show_reuse(inner_scope)

with tf.variable_scope('reuse_true', reuse=True) as outer_scope:
    show_reuse(outer_scope)
    with tf.variable_scope('reuse_none', reuse=None) as inner_scope:
        show_reuse(inner_scope)
    with tf.variable_scope('reuse_true', reuse=True) as inner_scope:
        show_reuse(inner_scope)
    with tf.variable_scope('auto_reuse', reuse=tf.AUTO_REUSE) as inner_scope:
        show_reuse(inner_scope)

with tf.variable_scope('auto_reuse', reuse=tf.AUTO_REUSE) as outer_scope:
    show_reuse(outer_scope)
    with tf.variable_scope('reuse_none', reuse=None) as inner_scope:
        show_reuse(inner_scope)
    with tf.variable_scope('reuse_true', reuse=True) as inner_scope:
        show_reuse(inner_scope)
    with tf.variable_scope('auto_reuse', reuse=tf.AUTO_REUSE) as inner_scope:
        show_reuse(inner_scope)

reuse_none: False
reuse_none/reuse_none: False
reuse_none/reuse_true: True
reuse_none/auto_reuse: _ReuseMode.AUTO_REUSE
reuse_true: True
reuse_true/reuse_none: True
reuse_true/reuse_true: True
reuse_true/auto_reuse: _ReuseMode.AUTO_REUSE
auto_reuse: _ReuseMode.AUTO_REUSE
auto_reuse/reuse_none: _ReuseMode.AUTO_REUSE
auto_reuse/reuse_true: True
auto_reuse/auto_reuse: _ReuseMode.AUTO_REUSE


# 実践重み共有

In [8]:
def has_same_outputs(tensor1, tensor2, sess):
    # ２つのテンソルの値が同じかを返します
    sess.run(tf.global_variables_initializer())
    result1, result2 = sess.run([tensor1, tensor2])
    return result1.tolist() == result2.tolist()

## 単純なフィードフォワードネットワーク

In [9]:
# ネットワークの作成を部品化
def feed_forward(inputs):
    d1 = tf.layers.dense(inputs, 20, name='dense_a')
    d2 = tf.layers.dense(d1, 30, name='dense_b')
    return d2

In [10]:
# 独立したネットワークを２つ作る例
with tf.Session(graph=tf.Graph()) as sess:
    inputs = tf.ones(shape=[2, 3])
    with tf.variable_scope('feed_forward_x'):
        outputs_x = feed_forward(inputs)
    with tf.variable_scope('feed_forward_y'):
        outputs_y = feed_forward(inputs)
    print('shared: {}'.format(has_same_outputs(outputs_x, outputs_y, sess)))
    show_variables()

shared: False
feed_forward_x/dense_a/kernel:0
feed_forward_x/dense_a/bias:0
feed_forward_x/dense_b/kernel:0
feed_forward_x/dense_b/bias:0
feed_forward_y/dense_a/kernel:0
feed_forward_y/dense_a/bias:0
feed_forward_y/dense_b/kernel:0
feed_forward_y/dense_b/bias:0


In [11]:
# 重みを共有した実質一つのネットワークを２つ作る例
with tf.Session(graph=tf.Graph()) as sess:
    inputs = tf.ones(shape=[2, 3])
    with tf.variable_scope('feed_forward_shared') as scope:
        outputs_x = feed_forward(inputs)
        scope.reuse_variables()
        outputs_y = feed_forward(inputs)
    print('shared: {}'.format(has_same_outputs(outputs_x, outputs_y, sess)))
    show_variables()

shared: True
feed_forward_shared/dense_a/kernel:0
feed_forward_shared/dense_a/bias:0
feed_forward_shared/dense_b/kernel:0
feed_forward_shared/dense_b/bias:0


## Seq2Seq のデコーダを学習時と生成時で分ける

In [12]:
import numpy as np
vocab_size = 3000
embedding_dim = 256
hidden_dim = 256
batch_size = 2
beam_width = 2
max_len = 8
decoder_scope = 'decoder'
cell_name = 'cell'
out_layer_name = 'out_layer'

def create_encoder(inputs, inputs_length, embedding):
    inputs_embedded = tf.nn.embedding_lookup(embedding, inputs)
    cell = tf.nn.rnn_cell.GRUCell(hidden_dim, name=cell_name)
    outputs, final_state = tf.nn.dynamic_rnn(
        cell=cell,
        inputs=inputs_embedded,
        sequence_length=inputs_length,
        dtype=tf.float32,
        scope='encoder'
    )
    return final_state

def create_trainer_decoder(thought_vector, embedding, inputs, inputs_length):
    cell = tf.nn.rnn_cell.GRUCell(hidden_dim, name=cell_name)
    output_layer = tf.layers.Dense(vocab_size, use_bias=False, name=out_layer_name)
    inputs_embedded = tf.nn.embedding_lookup(embedding, inputs)
    helper = tf.contrib.seq2seq.TrainingHelper(
        inputs=inputs_embedded,
        sequence_length=inputs_length,
    )
    decoder = tf.contrib.seq2seq.BasicDecoder(cell, helper, thought_vector, output_layer=output_layer)
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, swap_memory=True, scope=decoder_scope)
    return outputs.rnn_output

def create_generation_decoder(thought_vector, embedding):
    cell = tf.nn.rnn_cell.GRUCell(hidden_dim, name=cell_name)
    output_layer = tf.layers.Dense(vocab_size, use_bias=False, name=out_layer_name)
    start_tokens = tf.ones([batch_size], tf.int32)  # BOS==1
    end_token = 2  # EOS==2
    tiled_thought_vector = tf.contrib.seq2seq.tile_batch(thought_vector, multiplier=beam_width)

    decoder = tf.contrib.seq2seq.BeamSearchDecoder(
        cell=cell,
        embedding=embedding,
        start_tokens=start_tokens,
        end_token=end_token,
        initial_state=tiled_thought_vector,
        beam_width=beam_width,
        output_layer=output_layer,
    )

    decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder=decoder, maximum_iterations=max_len, scope=decoder_scope
    )
    return decoder_outputs.predicted_ids

with tf.Session(graph=tf.Graph()) as sess:
    encoder_inputs = tf.ones(shape=[batch_size, max_len], dtype=tf.int32)
    encoder_inputs_length = tf.ones(shape=[batch_size], dtype=tf.int32) * max_len
    decoder_inputs = tf.ones(shape=[batch_size, max_len], dtype=tf.int32)
    decoder_inputs_length = tf.ones(shape=[batch_size], dtype=tf.int32) * max_len

    embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_dim], -1.0, 1.0), dtype=tf.float32, name='embedding')
    thought_vector = create_encoder(encoder_inputs, encoder_inputs_length, embedding)
    
    with tf.variable_scope('decoder') as scope:
        train_outputs = create_trainer_decoder(
            thought_vector,
            embedding,
            decoder_inputs,
            decoder_inputs_length,
        )
        scope.reuse_variables()
        generation_outputs = create_generation_decoder(
            thought_vector,
            embedding,
        )
    
    sess.run(tf.global_variables_initializer())
    train_result, generation_result = sess.run([train_outputs, generation_outputs])
    train_ids = np.argmax(train_result, axis=-1)
    generation_ids = generation_result[:,:,0]
    print(train_ids)
    print(generation_ids)
    show_variables()

[[ 302  143 1909 1909 1909 1909 1909 1909]
 [ 302  143 1909 1909 1909 1909 1909 1909]]
[[1120 2573 2918 2455 2455 2316 2316 2316]
 [1120 2573 2918 2455 2455 2316 2316 2316]]
embedding:0
encoder/cell/gates/kernel:0
encoder/cell/gates/bias:0
encoder/cell/candidate/kernel:0
encoder/cell/candidate/bias:0
decoder/decoder/cell/gates/kernel:0
decoder/decoder/cell/gates/bias:0
decoder/decoder/cell/candidate/kernel:0
decoder/decoder/cell/candidate/bias:0
decoder/decoder/out_layer/kernel:0
