In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import tensorflow as tf

## An example of a simple computational graph in TensorFlow
![computational graph](computational_graph.png)

f(**x**) = **x**W + **b**  :

In [3]:
x = tf.placeholder(tf.float32, name='x', shape=[None, 784])

In [4]:
W = tf.Variable(tf.random_uniform([784,10], -1, 1), name="W")

In [5]:
b = tf.Variable(tf.zeros([10]), name='biases')

In [6]:
output = tf.matmul(x,W) + b

In [7]:
init_op = tf.initialize_all_variables()

In [8]:
sess = tf.Session()
sess.run(init_op)

## Navigating Variable Scopes and Sharing Variables

构建复杂模型通常需要reuse and sharing大量的变量集

In [9]:
def my_network(input):
    W_1 = tf.Variable(tf.random_uniform([784, 100], -1,1),name='W_1')
    b_1 = tf.Variable(tf.zeros([100]), name='biases_1')
    output_1 = tf.matmul(input, W_1) + b_1
    
    W_2 = tf.Variable(tf.random_uniform([100,50], -1, 1), name='W_2')  
    b_2 = tf.Variable(tf.zeros([50]), name='biases_2')
    output_2 = tf.matmul(output_1, W_2) + b_2    # reuse  output_1
    
    W_3 = tf.Variable(tf.random_uniform([50,10], -1, 1), name='W_3')
    b_3 = tf.Variable(tf.zeros([10]), name='baises_3')
    output_3 = tf.matmul(output_2, W_3) + b_3    # reuse output_2
    
    # printing names
    print("Names of weight parameters\n")
    print(W_1.name,'  ', W_2.name,'  ', W_3.name)
    print("\nNames of bias parameters\n")
    print(b_1.name,'  ', b_2.name,'  ', b_3.name)
    
    return output_3

这是个network，3 layers，6 variables

如果想多次使用这个network，就将它封装成function，以多次调用

In [10]:
i_1 = tf.placeholder(tf.float32, [1000, 784], name='input_1')
my_network(i_1)

i_2 = tf.placeholder(tf.float32, [1000, 784], name='input_2')
my_network(i_2)

# i_1 和 i_2 两个变量是一样的（除了变量名不一样），但执行结果却不一样：

Names of weight parameters

W_1:0    W_2:0    W_3:0

Names of bias parameters

biases_1:0    biases_2:0    baises_3:0


<tf.Tensor 'add_3:0' shape=(1000, 10) dtype=float32>

Names of weight parameters

W_1_1:0    W_2_1:0    W_3_1:0

Names of bias parameters

biases_1_1:0    biases_2_1:0    baises_3_1:0


<tf.Tensor 'add_6:0' shape=(1000, 10) dtype=float32>

仔细观察会发现，第二个调用没有像第一个调用那样使用相同的变量

在很多情况下，我们不想创建副本，而是重用模型及其变量。

因此，本例中，我们不应使用tf.Variable，而应该使用一个更高级的命名scheme，它能利用tf 的variable scoping

## 用 tf's variable scoping 重写my_network
涉及2个functions:

* tf.get_variable(&lt;name&gt;, &lt;shape&gt;, &lt;initializer&gt;)

* tf.variable_scope(&lt;scope_name&gt;)


In [11]:
def layer(input, weight_shape, bias_shape):
    weight_init = tf.random_uniform_initializer(minval=-1, maxval=1)
    bias_init = tf.constant_initializer(value=0)
    W = tf.get_variable("W", weight_shape, initializer=weight_init)
    b = tf.get_variable('b', bias_shape, initializer=bias_init)
    return tf.matmul(input, W) + b

def my_network(input):
    with tf.variable_scope('layer_1'):
        output_1 = layer(input, [784, 100], [100])
    with tf.variable_scope('layer_2'):
        output_2 = layer(output_1, [100, 50], [50])
    with tf.variable_scope('layer_3'):
        output_3 = layer(output_2, [50, 10], [10])
        
    return output_3

In [12]:
i_1 = tf.placeholder(tf.float32, [1000,784], name='input_1')
my_network(i_1)

i_2 = tf.placeholder(tf.float32, [1000,784], name='input_2')
#my_network(i_2)  # 会报异常“已经存在”：Variable layer_1/W already exists，因为默认sharing 是不被允许的

<tf.Tensor 'layer_3/add:0' shape=(1000, 10) dtype=float32>

### 如果我们想在变量范围内启用共享:

In [13]:
with tf.variable_scope("shared_variables") as scope:
    i_1 = tf.placeholder(tf.float32, [1000,784], name='i_1')
    my_network(i_1)
    scope.reuse_variables()
    i_2 = tf.placeholder(tf.float32, [1000,784], name='i_2')
    my_network(i_2)

<tf.Tensor 'shared_variables/layer_3/add:0' shape=(1000, 10) dtype=float32>

<tf.Tensor 'shared_variables/layer_3_1/add:0' shape=(1000, 10) dtype=float32>

这让我们保持模块化，同时允许变量共享。这也使命名方案更干净。

## GPU VS CPU

若要检查运行的devices是哪个，则初始化session 时，设置参数log_device_placement=True

In [14]:
sess = tf.Session(config=tf.ConfigProto(
                        log_device_placement=True))

In [26]:
with tf.device('/gpu:2'):  # 指定使用某device；若该设备不可用，则会报错
    a = tf.constant([1.0, 2.0, 3.0, 4.0], shape=[2, 2], name='a')
    b = tf.constant([1.0, 2.0], shape=[2, 1], name='b')
    c = tf.matmul(a, b) # 矩阵乘法运算

sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True, # 若选用的device不存在(如本机用'/gpu:0'的话),想另外找可用的，则传入该参数
            log_device_placement=True))
sess.run(c)

array([[  5.],
       [ 11.]], dtype=float32)

### Building multi-GPU models in a tower-like fashion
![multi-GPU](multi-GPU_model.png)

In [29]:
c = []
for device in ['/gpu:0', '/gpu:1']:
    with tf.device(device):
        A = tf.constant([1.0, 2.0, 3.0, 4.0], shape=[2, 2], name='Matrix_A')
        b = tf.constant([1.0, 2.0], shape=[2, 1], name='b')
        c.append(tf.matmul(a, b))
        
with tf.device('/cpu:0'):
    sum = tf.add_n(c)
    
sess = tf.Session(config=tf.ConfigProto(
                        allow_soft_placement=True,
                        log_device_placement=True))
sess.run(sum)
    

array([[ 10.],
       [ 22.]], dtype=float32)