In [1]:
import warnings
warnings.filterwarnings("ignore")

# Neural Network and Deep Learning

## Up and Running with TensorFlow

<img src="./pic/a-simple-computation-graph.png" height="200" width="300" align="left"></img>

In [2]:
import tensorflow as tf

In [3]:
# 定义图
x = tf.Variable(3,name="x")
y = tf.Variable(4,name="y")
f = x*x*y + y + 2

In [4]:
# 运行图
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

42


In [5]:
with tf.Session() as sess:
    x.initializer.run() # = tf.get_default_session().run(x.initalizer)
    y.initializer.run()
    result = f.eval() # = tf.get_default_session().run(f)

In [8]:
init = tf.global_variables_initializer() #创建一个执行所有初始化操作的节点
with tf.Session() as sess:
    init.run() #运行初始化节点
    result = f.eval() # = tf.get_default_session().run(f)

InteractiveSession

In [9]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


### Managing Graphs

In [11]:
#任何创建的节点都会自动归入默认图
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

###### 多图管理

In [13]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
False


In [14]:
###注意点:对于jupyter可能产生多次执行问题，最终得到包含了很多重复节点的图。
###比重启更好的方法是重置默认图tf.reset_default_graph()

In [17]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3
# tf会根据依赖关系优先w和x再执行y,然后再更具依赖关系执行执行w和x再执行z;(w和x被执行了2次)
# tf在执行run后会抛弃所有节点，除了variable,它会一直被维护在跨图运行的session中，
# variable的生命周期开始于初始化，结束语session的close
with tf.Session() as sess:
    print(y_val)
    print(z_val)

# 更有效的写法
with tf.Session() as sess:
    y_val, z_val = sess.run([y,z])
    print(y_val)
    print(z_val)

10
15
10
15


###### tips:在单个tensorflow进程里，多个session不共享状态，即使是使用相同图，每个session都会有它自己的copy。分布式的变量状态存储在servers上，而不在session中，多个session可以共享。

### tf线性回归
###### 正规方程

In [18]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /Users/zhen.huaz/scikit_learn_data


In [22]:
housing_data_plus_bias = np.c_[np.ones((m,1)),housing.data]

In [29]:
X = tf.constant(housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)),XT),y)

with tf.Session() as sess:
    theta_value = theta.eval()
    print(sess.run(theta))

[[ -3.74651413e+01]
 [  4.35734153e-01]
 [  9.33829229e-03]
 [ -1.06622010e-01]
 [  6.44106984e-01]
 [ -4.25131839e-06]
 [ -3.77322501e-03]
 [ -4.26648885e-01]
 [ -4.40514028e-01]]


###### 梯度下降法

In [41]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1))
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.shape)

[  1.00000000e+00   6.60969987e-17   5.50808322e-18   6.60969987e-17
  -1.06030602e-16  -1.10161664e-17   3.44255201e-18  -1.07958431e-15
  -8.52651283e-15]
[ 0.38915536  0.36424355  0.5116157  ..., -0.06612179 -0.06360587
  0.01359031]
0.111111111111
(20640, 9)


In [45]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error),name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X),error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
    best_theta = theta
    r = theta.eval()
r

Epoch 0 MSE =  2.75443
Epoch 100 MSE =  0.632222
Epoch 200 MSE =  0.57278
Epoch 300 MSE =  0.558501
Epoch 400 MSE =  0.549069
Epoch 500 MSE =  0.542288
Epoch 600 MSE =  0.537379
Epoch 700 MSE =  0.533822
Epoch 800 MSE =  0.531243
Epoch 900 MSE =  0.529371


array([[  2.06855226e+00],
       [  7.74078071e-01],
       [  1.31192386e-01],
       [ -1.17845111e-01],
       [  1.64778173e-01],
       [  7.44084362e-04],
       [ -3.91945168e-02],
       [ -8.61356556e-01],
       [ -8.23479652e-01]], dtype=float32)

###### using autodiff
tf提供了自动高效的计算梯度的方法

In [55]:
np.random.randn(32)

array([ 0.58434588, -0.8888061 ,  0.98923267,  0.80572489, -0.21271437,
        0.07939033,  0.26506805,  0.5226235 , -0.13174742,  0.98886718,
        1.15235939,  1.30175708, -0.48651095, -0.74862838,  0.74884594,
       -0.7613145 ,  0.38705451,  1.06862263, -0.19900075,  0.25828872,
        0.16220417,  0.90693196, -1.17076901,  0.86608974,  0.46626157,
       -0.40035605, -0.01150335,  0.53446835,  0.68800041, -0.93991925,
       -1.87809245, -1.38079079])

In [56]:
autogradients = tf.gradients(mse,[theta])[0]
auto_training_op = tf.assign(theta, theta - learning_rate * autogradients)

In [50]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(auto_training_op)
    best_theta = theta
    r = theta.eval()
r

Epoch 0 MSE =  2.75443
Epoch 100 MSE =  0.632222
Epoch 200 MSE =  0.57278
Epoch 300 MSE =  0.558501
Epoch 400 MSE =  0.549069
Epoch 500 MSE =  0.542288
Epoch 600 MSE =  0.537379
Epoch 700 MSE =  0.533822
Epoch 800 MSE =  0.531243
Epoch 900 MSE =  0.529371


array([[  2.06855249e+00],
       [  7.74078071e-01],
       [  1.31192386e-01],
       [ -1.17845066e-01],
       [  1.64778143e-01],
       [  7.44078017e-04],
       [ -3.91945094e-02],
       [ -8.61356676e-01],
       [ -8.23479772e-01]], dtype=float32)

###### using optimizer¶
可以直接替代前面的gradients和training_op

In [65]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#相比于上面更快
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9)
optimizer_training_op = optimizer.minimize(mse)
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(optimizer_training_op)
    best_theta = theta
    r = theta.eval()
r

Epoch 0 MSE =  2.75443
Epoch 100 MSE =  0.632222
Epoch 200 MSE =  0.57278
Epoch 300 MSE =  0.558501
Epoch 400 MSE =  0.549069
Epoch 500 MSE =  0.542288
Epoch 600 MSE =  0.537379
Epoch 700 MSE =  0.533822
Epoch 800 MSE =  0.531243
Epoch 900 MSE =  0.529371


array([[  2.06855249e+00],
       [  7.74078071e-01],
       [  1.31192386e-01],
       [ -1.17845066e-01],
       [  1.64778143e-01],
       [  7.44078017e-04],
       [ -3.91945094e-02],
       [ -8.61356676e-01],
       [ -8.23479772e-01]], dtype=float32)

###### feed data

In [62]:
A = tf.placeholder(tf.float32,shape=(None,3))
B = A + 5

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A:[[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A:[[4,5,6],[7,8,9]]})
    
print(B_val_1)
print(B_val_2)

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]


In [63]:
# X = tf.placeholder(tf.float32,shape=(None,n + 1),name="X")
# y = tf.placeholder(tf.float32,shape=(None,1),name="y")

# batch_size = 100
# n_batches = int(np.ceil(m / batch_size))

# def fetch_batch(epoch, batch_index, batch_size):
#     [...] #load the data from disk
#     return X_batch, y_batch

# with tf.Session() as sess:
#     sess.run(init)
    
#     for epoch in range(n_epochs):
#         for batch_index in range(n_batches):
#             X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
#             sess.run(training_op,feed_dict={X:X_batch, y: y_batch})
#     best_theta = theta.eval()

###### Saving and Restoring Models

In [67]:
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(optimizer_training_op)  
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt")

In [68]:
#可以用来代替初始化
with tf.Session() as sess:
    saver.restore(sess,"/tmp/my_model_final.ckpt")
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(optimizer_training_op)  

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt
Epoch 0 MSE =  0.528011
Epoch 100 MSE =  0.527022
Epoch 200 MSE =  0.526302
Epoch 300 MSE =  0.525778
Epoch 400 MSE =  0.525394
Epoch 500 MSE =  0.525114
Epoch 600 MSE =  0.524909
Epoch 700 MSE =  0.524758
Epoch 800 MSE =  0.524647
Epoch 900 MSE =  0.524566


In [69]:
# saver = tf.train.Saver({"weights":theta}) 只保存theta

###### 可视化图和训练曲线的DashBoard

In [74]:
tf.reset_default_graph()
from datetime import datetime

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error),name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X),error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "/tmp/tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

#评估MSE并把它写入TensorBoard兼容的二进制日志文件（tf.summary）
mse_summary = tf.summary.scalar("MSE", mse)
#创建一个FileWriter用来写入文件，第一个参数时用来写入summaries的目录，第二个是图的可视化
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            summary_str = mse_summary.eval()
            step = epoch 
            file_writer.add_summary(summary_str,step)
        sess.run(training_op)
    file_writer.close()

###### Name scopes

In [79]:
# 当处理复杂模型的时候，图会变得混乱。
with tf.name_scope("loss") as scope:
    error = y_pred - y
    c = tf.constant(3,name="c")
    d = 2 - c
    mse = tf.reduce_mean(tf.square(error),name="mse")
#使用的时候需要带上命名
print(error.op.name)
print(mse.op.name)
print(c.op.name)
print(d.op.name)

loss_4/sub
loss_4/mse
loss_4/c
loss_4/sub_1


###### Modularity

ReLU公式: $h_{w,b}(X) = max(X*w + b,0)$

In [80]:
#code-1
n_features = 3
X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
# tf.random_normal从正太分布中输出随机值
#random_normal(shape,mean=0.0,stddev=1.0,dtype=tf.float32,seed=None,name=None)

# tf.random_uniform从均匀分布中输出随机值
#返回值的范围默认是0到1的左闭右开区间，即[0，1)。minval为指定最小边界，默认为1。maxval为指定的最大边界，如果是数据浮点型则默认为1，如果数据为整形则必须指定。
w1 = tf.Variable(tf.random_normal((n_features, 1)), name = "weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name = "weights2")
b1 = tf.Variable(0.0, name = "bias1")
b2 = tf.Variable(0.0, name = "bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name = "z1")
z2 = tf.add(tf.matmul(X, w2), b2, name = "z2")
relu1 = tf.maximum(z1, 0 , name = "relu1")
relu2 = tf.maximum(z2, 0 , name = "relu2")

output = tf.add(relu1, relu2 , name="output")

#code-2
# 对于名字tf会对存在的名字自增，如weights第二次的名字未weights_1
def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name = "weights")
    b = tf.Variable(0.0, name = "bias")
    z = tf.add(tf.matmul(X, w), b, name = "z")
    return tf.maximum(z, 0 , name = "relu")

relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name = "output")


In [82]:
graph = tf.get_default_graph()

In [95]:
#观察节点
graph.get_operations()
#观察变量
graph.get_collection('variables')

[<tf.Variable 'theta:0' shape=(9, 1) dtype=float32_ref>,
 <tf.Variable 'weights1:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'weights2:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'bias1:0' shape=() dtype=float32_ref>,
 <tf.Variable 'bias2:0' shape=() dtype=float32_ref>,
 <tf.Variable 'weights:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'bias:0' shape=() dtype=float32_ref>,
 <tf.Variable 'weights_1:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'bias_1:0' shape=() dtype=float32_ref>,
 <tf.Variable 'weights_2:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'bias_2:0' shape=() dtype=float32_ref>,
 <tf.Variable 'weights_3:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'bias_3:0' shape=() dtype=float32_ref>,
 <tf.Variable 'weights_4:0' shape=(3, 1) dtype=float32_ref>,
 <tf.Variable 'bias_4:0' shape=() dtype=float32_ref>]

In [125]:
output_summary = tf.summary.scalar("output", output)
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    summary_writer = tf.summary.FileWriter("/tmp/tf-log-1", sess.graph)

###### Sharing Variables

In [127]:
#code-1
# 问题当变量很多的时候共享传参变得困难
def relu(X,threshold):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name = "weights")
        b = tf.Variable(0.0, name = "bias")
        z = tf.add(tf.matmul(X, w), b, name = "z")
        return tf.maximum(z, threshold , name = "relu")

threshold = tf.Variable(0.0,name="threshold")
#code-2
#定义方法的全局变量
def relu(X):
     with tf.name_scope("relu"):
            if not hasattr(relu,"threshold"):
                relu.threshold =  tf.Variable(0.0,name="threshold")
            w_shape = (int(X.get_shape()[1]), 1)
            w = tf.Variable(tf.random_normal(w_shape), name = "weights")
            b = tf.Variable(0.0, name = "bias")
            z = tf.add(tf.matmul(X, w), b, name = "z")
            return tf.maximum(z, threshold , name = "relu")
        
#code-3
#定义tf变量,reuse控制是否重用
def relu(X):
     with tf.variable_scope("relu",reuse=True): #使用并更改为重用，改成True后不能为False
            threshold =  tf.get_variable("threshold")
            w_shape = (int(X.get_shape()[1]), 1)
            w = tf.Variable(tf.random_normal(w_shape), name = "weights")
            b = tf.Variable(0.0, name = "bias")
            z = tf.add(tf.matmul(X, w), b, name = "z")
            return tf.maximum(z, threshold , name = "relu")
      
X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
with tf.variable_scope("relu"): #create the Variable
    threshold =  tf.get_variable("threshold",shape=(),initializer=tf.constant_initializer(0.0))
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name = "output")
   