In [1]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Creating and running a graph

In [4]:
import tensorflow as tf

In [5]:
reset_graph()
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y +2

In [6]:
f

<tf.Tensor 'add_1:0' shape=() dtype=int32>

In [7]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    result = f.eval()

In [8]:
result

42

# Managing graphs

In [9]:
reset_graph()

x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [10]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
x2.graph is graph

True

In [11]:
x2.graph is tf.get_default_graph()

False

# Linear Regression

## using the Normal Equation

In [15]:
from sklearn.datasets import fetch_california_housing

# housing = fetch_california_housing()

m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [16]:
reset_graph()
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y) # 正規方程式

with tf.Session() as sess:
    theta_value = theta.eval()

In [17]:
theta_value

array([[-3.7185181e+01],
       [ 4.3633747e-01],
       [ 9.3952334e-03],
       [-1.0711310e-01],
       [ 6.4479220e-01],
       [-4.0338000e-06],
       [-3.7813708e-03],
       [-4.2348403e-01],
       [-4.3721911e-01]], dtype=float32)

compare with pure numpy

In [18]:
X = housing_data_plus_bias
y = housing.target.reshape(-1, 1)
theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

theta_numpy

array([[-3.69419202e+01],
       [ 4.36693293e-01],
       [ 9.43577803e-03],
       [-1.07322041e-01],
       [ 6.45065694e-01],
       [-3.97638942e-06],
       [-3.78654266e-03],
       [-4.21314378e-01],
       [-4.34513755e-01]])

compare with Scikit-Learn

In [19]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(housing.data, housing.target.reshape(-1, 1))

np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T]



array([[-3.69419202e+01],
       [ 4.36693293e-01],
       [ 9.43577803e-03],
       [-1.07322041e-01],
       [ 6.45065694e-01],
       [-3.97638942e-06],
       [-3.78654265e-03],
       [-4.21314378e-01],
       [-4.34513755e-01]])

## Using Batch Gradient Descent

**NOTE:** Gradient Descent requires scalling the feature vectors first.  We could do this using TF, but let's just use Scikit-Learn for now.

In [20]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

### Manually computing the gradients

In [21]:
reset_graph()

n_epochs = 1000
learning_rate = 0.01
m, n = housing.data.shape

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate*gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE=', mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

Epoch 0 MSE= 9.161543
Epoch 100 MSE= 0.71450067
Epoch 200 MSE= 0.5667049
Epoch 300 MSE= 0.5555719
Epoch 400 MSE= 0.5488112
Epoch 500 MSE= 0.5436362
Epoch 600 MSE= 0.5396294
Epoch 700 MSE= 0.53650916
Epoch 800 MSE= 0.5340678
Epoch 900 MSE= 0.5321474


In [22]:
best_theta

array([[ 2.0685523 ],
       [ 0.8874027 ],
       [ 0.14401656],
       [-0.34770885],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.66145283],
       [-0.6375278 ]], dtype=float32)

### Using autodiff

Same as above except for the gradients = ... line:

In [23]:
reset_graph()

n_epochs = 1000
learning_rate = 0.01
m, n = housing.data.shape

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [25]:
gradients = tf.gradients(mse, [theta])[0]

In [26]:
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta:")
print(best_theta)

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.7145006
Epoch 200 MSE = 0.566705
Epoch 300 MSE = 0.5555719
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436362
Epoch 600 MSE = 0.5396294
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.5321474
Best theta:
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


### Using a GradientDescentOptimizer

In [30]:
reset_graph()

n_epochs = 1000
learning_rate = 0.01
m, n = housing.data.shape

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [33]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [34]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta:")
print(best_theta)

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.7145006
Epoch 200 MSE = 0.566705
Epoch 300 MSE = 0.5555719
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436362
Epoch 600 MSE = 0.5396294
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.5321474
Best theta:
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


### Using a momentum optimizer

In [35]:
reset_graph()

n_epochs = 1000
learning_rate = 0.01
m, n = housing.data.shape

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [38]:
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

In [39]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta:")
print(best_theta)

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.53056407
Epoch 200 MSE = 0.5250113
Epoch 300 MSE = 0.52441096
Epoch 400 MSE = 0.52433306
Epoch 500 MSE = 0.52432257
Epoch 600 MSE = 0.52432126
Epoch 700 MSE = 0.52432096
Epoch 800 MSE = 0.52432096
Epoch 900 MSE = 0.52432096
Best theta:
[[ 2.068558  ]
 [ 0.8296286 ]
 [ 0.11875337]
 [-0.26554456]
 [ 0.3057109 ]
 [-0.00450251]
 [-0.03932662]
 [-0.89986444]
 [-0.87052065]]


MESが他の勾配法と比べ小さくなっているので，少しばかり性能が上がった

# Feeding data to the training algorithm

## Placeholder nodes

In [41]:
reset_graph()

A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A:[[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A:[[4, 5, 6], [7, 8, 9]]})

In [45]:
print(B_val_1)

[[6. 7. 8.]]


In [44]:
print(B_val_2)

[[ 9. 10. 11.]
 [12. 13. 14.]]


## Mini-batch Gradient Descent

In [46]:
n_epochs = 1000
learning_rate = 0.01

In [47]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n+1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

In [48]:
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')

y_pred = tf.matmul(X, theta, name='predicitions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [49]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m/batch_size))

init = tf.global_variables_initializer()

In [52]:
def fetch_batch(epoch, bach_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

In [69]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
            
    best_theta = theta.eval()

In [56]:
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

# Saving and restoring a model

In [72]:
n_epochs = 1000
learning_rate = 0.01

m, n = housing.data.shape

In [74]:
reset_graph()

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1, seed=42), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [83]:
init = tf.global_variables_initializer()
###
saver = tf.train.Saver()
# If you want to have a saver that loads and restores theta with a different name, such as "weights":
# saver = tf.train.Saver({"weights": theta})
###

In [80]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE=', mse.eval())
            save_path = saver.save(sess, './model/my_model.ckpt')
        sess.run(training_op)
    
    best_theta = theta.eval()
    save_path = saver.save(sess, './model/my_model_final.ckpt')

Epoch 0 MSE= 9.161543
Epoch 100 MSE= 0.7145006
Epoch 200 MSE= 0.566705
Epoch 300 MSE= 0.5555719
Epoch 400 MSE= 0.5488112
Epoch 500 MSE= 0.5436362
Epoch 600 MSE= 0.5396294
Epoch 700 MSE= 0.5365092
Epoch 800 MSE= 0.5340678
Epoch 900 MSE= 0.5321474


In [79]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

restore

In [81]:
with tf.Session() as sess:
    saver.restore(sess, './model/my_model_final.ckpt')
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from ./model/my_model_final.ckpt


In [82]:
np.allclose(best_theta, best_theta_restored)

True

By default the saver also saves the graph structure itself in a second file with the extension .meta. You can use the function tf.train.import_meta_graph() to restore the graph structure. This function loads the graph into the default graph and returns a Saver that can then be used to restore the graph state (i.e., the variable values):

In [86]:
reset_graph()

saver = tf.train.import_meta_graph('./model/my_model_final.ckpt.meta')
theta = tf.get_default_graph().get_tensor_by_name('theta:0')

with tf.Session() as sess:
    saver.restore(sess, './model/my_model_final.ckpt')
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from ./model/my_model_final.ckpt


In [87]:
np.allclose(best_theta, best_theta_restored)

True

# Visializing the graph

## Using TensorBoard

In [88]:
reset_graph()

from datetime import datetime

now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
root_logdir = 'tf_logs'
logdir = '{}/run-{}'.format(root_logdir, now)

In [91]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n+1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [92]:
init = tf.global_variables_initializer()

In [95]:
mse_summary = tf.summary.scalar('MSE', mse) # TensorBoard互換のログのバイナリ列
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) # ログのイベントファイルへの書き込み先, 書き込み対象のグラフを指定

In [97]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m/batch_size))

In [98]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    
    best_theta = theta.eval()

In [99]:
file_writer.close()

In [100]:
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

TensorBoard in browser

In [None]:
!tensorboard --logdir tf_logs

## Name scopes

In [101]:
reset_graph()

now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n+1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')

In [102]:
with tf.name_scope('loss') as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name='mse')

In [103]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [106]:
init = tf.global_variables_initializer()

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [107]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m/batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    
    best_theta = theta.eval()

In [108]:
file_writer.flush()
file_writer.close()
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

In [110]:
print(error.op.name)

loss/sub


In [111]:
print(mse.op.name)

loss/mse


## Modularity

ugly flat code:

In [112]:
reset_graph()

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')

w1 = tf.Variable(tf.random_normal((n_features, 1)), name='weights1')
w2 = tf.Variable(tf.random_normal((n_features, 1)), name='weights2')
b1 = tf.Variable(0.0, name='bias1')
b2 = tf.Variable(0.0, name='bias2')

z1 = tf.add(tf.matmul(X, w1), b1, name='z1')
z2 = tf.add(tf.matmul(X, w2), b2, name='z2')

relu1 = tf.maximum(z1, 0., name='relu1')
relu2 = tf.maximum(z2, 0., name='relu2')

output = tf.add(relu1, relu2, name='output')

much better code: with a function

In [118]:
reset_graph()

def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    return tf.maximum(z, 0., name='relu')

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

In [119]:
file_writer = tf.summary.FileWriter('tf_logs/relu1', tf.get_default_graph())

even better code: name scopes

In [120]:
reset_graph()

def relu(X):
    with tf.name_scope('relu'):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name='weights')
        b = tf.Variable(0.0, name='bias')
        z = tf.add(tf.matmul(X, w), b, name='z')
        return tf.maximum(z, 0., name='max')
    
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

In [122]:
file_writer = tf.summary.FileWriter('tf_logs/relu2', tf.get_default_graph())
file_writer.close()

## Sharing Variables

関数の引数に渡す最もシンプルな方法

In [123]:
reset_graph()

def relu(X, threshold):
    with tf.name_scope('relu'):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name='weights')
        b = tf.Variable(0.0, name='bias')
        z = tf.add(tf.matmul(X, w), b, name='z')
        return tf.maximum(z, threshold, name='max')

threshold = tf.Variable(0.0, name='threshold')
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X, threshold) for i in range(5)]
output = tf.add_n(relus, name='output')

関数に属性を持たせる

In [126]:
reset_graph()

def relu(X):
    with tf.name_scope('relu'):
        if not hasattr(relu, 'threshold'):
            relu.threshold = tf.Variable(0.0, name='threshold')
        
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name='weights')
        b = tf.Variable(0.0, name='bias')
        z = tf.add(tf.matmul(X, w), b, name='z')
        return tf.maximum(z, relu.threshold, name='max')

X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

get_variable()によって，  
まだ作成されていない時は作成，作成されていれば再利用ができる．

In [129]:
reset_graph()

def relu(X):
    with tf.variable_scope('relu', reuse=True):
        threshold = tf.get_variable('threshold')
        
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name='weights')
        b = tf.Variable(0.0, name='bias')
        z = tf.add(tf.matmul(X, w), b, name='z')
        return tf.maximum(z, threshold, name='max')

X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
with tf.variable_scope('relu'):
    threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initializer(0.0))

relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

relu()に属する「threshold」を外部で定義するのも微妙なので，  
下記のように最初だけ変数を作成し，その後は再利用を明示する「reuse」を指定する（再利用時は明示する必要あり）

In [131]:
reset_graph()

def relu(X):
    threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initializer(0.0))
    
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    return tf.maximum(z, threshold, name='max')

X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = []
for relu_index in range(5):
    with tf.variable_scope('relu', reuse=(relu_index >= 1)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name='output')