## Installation

In [2]:
import tensorflow as tf

  return f(*args, **kwds)


In [3]:
print (tf.__version__)

1.10.1


## Creating your first graph and running it in a session

In [None]:
# create tensor graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

In [None]:
# create running session
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

In [None]:
# 使用with声明
with tf.Session() as sess:
    # 这里自动设置了下面代码执行的上下文，即tf.Session()，而且这个上下文会自动释放
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
print(result)

In [None]:
# 全局初始化
init = tf.global_variables_initializer() # prepare an init node, 这里只是获取一个初始化函数，并不会跑代码
with tf.Session() as sess:
    init.run() # 初始化所有变量
    result = f.eval()
print(result)

In [None]:
# 全局上下文，仅在Python shell中使用
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close() # 这个全局上下文需要手动关闭

## Managing Graphs

In [None]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

In [None]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
x2.graph is graph

In [None]:
x2.graph is tf.get_default_graph()

In [None]:
tf.reset_default_graph()

In [None]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3
with tf.Session() as sess:
    # 分开计算时，x会被计算两次
    print(y.eval())
    print(z.eval())

In [None]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z]) # 这种情况下，x只会被计算一次
    print(y_val)
    print(z_val)

## Linear Regression with Tensorflow

In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] # add bias feature x0
housing_target = housing.target.reshape(-1, 1);

In [None]:
# 使用Normal Equation进行计算，因此只需要一条公式：theta = (X.transpose * X).inverse * X.transpose * y
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing_target, dtype=tf.float32, name='y')
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()

In [None]:
theta_value

In [None]:
# calculate mean squared error
y_pred = np.matmul(housing_data_plus_bias, theta_value)
mse = np.mean(np.square(y_pred - housing_target))
print("MSE:", mse)

使用TensorFlow计算的好处就是，可以利用GPU进行计算加速（如果支持GPU计算的话）

## Implementing Gradient Descent

In [None]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()
scaled_housing_data = std_scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [None]:
%%time
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing_target, dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradient = 2 / m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradient)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

In [None]:
# calculate mean squared error
y_pred = np.matmul(scaled_housing_data_plus_bias, best_theta)
mse_value = np.mean(np.square(y_pred - housing_target))
print("MSE:", mse_value)

### Using autodiff

In [None]:
%%time
theta2 = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta2')
gradient = tf.gradients(mse, [theta2])[0] # 使用tf自带函数进行导数计算

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

### Using an Optimizer

In [None]:
%%time
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

In [None]:
%%time
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing_target, dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradient = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

# momentum 动量？
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # 替换了这里，优化
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

### TensorFlow for Gradient Descent
1. 声明Variable变量：包括X, y, theta
2. 构建Cost Function，这里是MSE公式
3. 计算gradient，这里使用tf.gradients
4. 构建training_op，也就是每次迭代的更新点：theta = theta - gradient * learning_rate
5. 获取Variable初始化方法，tf.global_variables_initializer()
6. 运行Session，记得先初始化，然后用loop进行多次迭代更新theta值

## Feeding Data to the Training Algorithm

In [None]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
print(B_val_1)
print(B_val_2)

#### Difference between Variable and Placeholder:
https://stackoverflow.com/questions/36693740/whats-the-difference-between-tf-placeholder-and-tf-variable

In [None]:
%%time
# Mini-batch Gradient Descent
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
# X = tf.Variable(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
# y = tf.Variable(housing_target, dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], 1.0 -1.0), name="theta");
y_pred = tf.matmul(X, theta, name="predictions")
mse = tf.reduce_mean(tf.square(y_pred - y), name='mse')
gradient = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradient)

n_epochs=3
batch_size=100 # 每次获取的数据块大小
n_batchs = int(np.floor(m / batch_size))

init = tf.global_variables_initializer()

last_epoch = -1
random_indices = np.random.permutation(m)
def fetch_batch(epoch, batch_index, batch_size):
#     print('fetch_batch', epoch, batch_index, batch_size)
    global last_epoch
    global random_indices
    if last_epoch != epoch:
        random_indices = np.random.permutation(m)
#         print('Change random indices')
        last_epoch = epoch
    indices = random_indices[batch_index * batch_size : (batch_index + 1) * batch_size]
    # get data
    return scaled_housing_data_plus_bias[indices, :], housing_target[indices, :]

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        # get batch data            
        for batch_index in range(n_batchs):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
#             print("X_batch:", X_batch.shape, "y_batch:", y_batch.shape)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        print('Epoch', epoch, "MSE =", mse.eval(feed_dict={X: X_batch, y: y_batch}))
        # print info
        best_theta = theta.eval()
print(best_theta)

## Saving and Restoring Models

In [None]:
# 每个训练节点都保存模型
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing_target, dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradient = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradient)

init = tf.global_variables_initializer()

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, "MSE =", mse.eval())
            save_path = saver.save(sess, '/Users/chance/machine_learning/hands_on_machine_learning/tmp/my_model.ckpt')
        sess.run(training_op)
    best_theta = theta.eval()
    save_path = saver.save(sess, '/Users/chance/machine_learning/hands_on_machine_learning/tmp/my_final.ckpt')
print(best_theta)

In [None]:
# restore model
# 这里恢复的时候不需要调用global_variable_initializer
# 模型节点的前置声明还是需要的，
with tf.Session() as sess:
    saver.restore(sess, '/Users/chance/machine_learning/hands_on_machine_learning/tmp/my_final.ckpt')
    print(theta.eval())

## Visualizing the Graph and Training Curves Using TensorBoard

In [None]:
# Mini-batch Gradient Descent

n_epochs = 3
learning_rate = 0.01
batch_size=100 # 每次获取的数据块大小
n_batchs = int(np.floor(m / batch_size))

X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], 1.0 -1.0), name="theta");
y_pred = tf.matmul(X, theta, name="predictions")
with tf.name_scope("loss") as scope: # 使用scope之后可以把scope里面的操作进行归类
    mse = tf.reduce_mean(tf.square(y_pred - y), name='mse')
    print("Test scope name:", mse.op.name)
gradient = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradient)

init = tf.global_variables_initializer()

last_epoch = -1
random_indices = np.random.permutation(m)
def fetch_batch(epoch, batch_index, batch_size):
    global last_epoch
    global random_indices
    if last_epoch != epoch:
        random_indices = np.random.permutation(m)
        last_epoch = epoch
    indices = random_indices[batch_index * batch_size : (batch_index + 1) * batch_size]
    # get data
    return scaled_housing_data_plus_bias[indices, :], housing_target[indices, :]

from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_dir = "tf_logs"
logdir = "{}/run-{}".format(root_dir, now) # 这里的地址是相对路径的，跟前面的模型保存还不一样...?
mse_summary = tf.summary.scalar("MSE", mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        # get batch data            
        for batch_index in range(n_batchs):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)  
            # 添加文件日志信息
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batchs + batch_index
#                 print("step:", step)
                file_writer.add_summary(summary_str, step)
            
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        print('Epoch', epoch, "MSE =", mse.eval(feed_dict={X: X_batch, y: y_batch}))
        # print info
        best_theta = theta.eval()
file_writer.flush()
file_writer.close()        
print(best_theta)

## Moduarity

In [None]:
# 重复的代码
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z1")

relu1 = tf.maximum(z1, 0.0, name="relu1")
relu2 = tf.maximum(z2, 0.0, name="relu1")

output = tf.add(relu1, relu2, name="output")

In [None]:
# 模块化 
def relu(X):
    # 也可使用scope封装，graph看起来更有条理
    # with tf.name_scope('relu'):
    #    ...
    w_shape = (int(X.get_shape()[1]), 1) # = (n_features, 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, 0, name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)] # 这里使用python的语法创建5个相同的节点
output = tf.add_n(relus, name="output")


## Sharing Variables

In [None]:
# 方法1：通过函数参数传递共享参数

def relu(X, threshold):
    with tf.name_scope('relu'):
        w_shape = (int(X.get_shape()[1]), 1) # = (n_features, 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
threshold = tf.Variable(0.0, name="threshold")
relus = [relu(X, threshold) for i in range(5)] # 这里使用python的语法创建5个相同的节点
output = tf.add_n(relus, name="output")


In [None]:
# 方法2：在函数内部创建参数

def relu(X, threshold):
    with tf.name_scope('relu'):
        # 这里通过添加python function参数的方式实现...
        if not hasattr(relu, 'threshold'):
            relu.threshold = tf.Variable(0.0, name="threshold")
        w_shape = (int(X.get_shape()[1]), 1) # = (n_features, 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, relu.threshold, name="relu")

In [3]:
# variable scope
# 这个方式可以防止threshold参数被重用，就是说在‘relu’这个域名下只能有一个threshold参数，重复调用会引发Exception
with tf.variable_scope('relu'):
    threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initializer(0.0))

In [None]:
# 引起Exception
with tf.variable_scope('relu'):
    threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initializer(0.0))

In [9]:
# 启用复用功能
# 注意：这里的threshold必须在之前已经被初始化过的，否则会引发Exception，也就是说不会自动初始化
with tf.variable_scope('relu') as scope:
    scope.reuse_variables()
    threshold = tf.get_variable('threshold')
    threshold.value = 3

In [10]:
with tf.variable_scope('relu', reuse=True):
    threshold = tf.get_variable('threshold')
    print(threshold.value)

3


注意：一旦启用了resue之后，便无法关闭，在这个scope下的get_variable都会自动reuse

In [11]:
# 方法3： 使用variable_scope
def relu(X):
    with tf.variable_scope('relu', reuse=True):
        threshold = tf.get_variable('threshold_')
        w_shape = (int(X.get_shape()[1]), 1) # = (n_features, 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope('relu'):
    threshold = tf.get_variable('threshold_', shape=(), initializer=tf.constant_initializer(0.0))
relus = [relu(X) for i in range(5)] # 这里使用python的语法创建5个相同的节点
output = tf.add_n(relus, name="output")