In [1]:
import tensorflow as tf
import numpy as np

  from ._conv import register_converters as _register_converters


### v1 : 手工定义w, b
### 这里使用Adam，因为使用GradientDescentOptimizer会梯度爆炸

In [11]:
def linear_regression_v1(train_x,train_y,epoch=20000,lr=0.001):
    with tf.variable_scope("v1", reuse=tf.AUTO_REUSE) as scope:
        x = tf.placeholder(tf.float32)
        y = tf.placeholder(tf.float32)
        w = tf.Variable(tf.random_normal([1]))
        b = tf.Variable(tf.random_normal([1]))

        pred = tf.add(tf.multiply(x, w), b)
        loss = tf.reduce_sum(tf.pow(pred - y, 2))

        optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(1,epoch+1):
                sess.run(optimizer, {x:train_x, y:train_y})
                if i % 2000 == 0:
                    ww, bb, ll = sess.run([w, b, loss], {x:train_x, y:train_y})
#                     print("%s, %s, %s, %s" % (i, ww, bb, ll))
                    print("%s, %s" % (i, ll))

### v2 : 使用梯度裁剪解决SGD梯度爆炸的问题

In [15]:
def linear_regression_v2(train_x,train_y,epoch=20000,lr=0.001):
    with tf.variable_scope("v2", reuse=tf.AUTO_REUSE) as scope:
        x = tf.placeholder(tf.float32, [100,1])
        y = tf.placeholder(tf.float32, [100,1])
        w = tf.Variable(tf.random_normal([1]))
        b = tf.Variable(tf.random_normal([1]))

        pred = tf.add(tf.multiply(x, w), b)
        loss = tf.reduce_sum(tf.pow(pred - y, 2))
        """ 梯度裁剪 """
        op = tf.train.GradientDescentOptimizer(lr)
        grads_and_vars = op.compute_gradients(loss)
        grad, variables = zip(*grads_and_vars)
        clipped_grad, glob_norm = tf.clip_by_global_norm(grad, clip_norm=5)
        optimizer = op.apply_gradients(zip(clipped_grad, variables))

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(1,epoch+1):
                sess.run(optimizer, {x:train_x, y:train_y})
                if i % 2000 == 0:
                    ww, bb, ll = sess.run([w, b, loss], {x:train_x, y:train_y})
                    print("%s, %s" % (i, ll))
#                     print("%s, %s, %s, %s" % (i, ww, bb, ll))

### v3：使用dense构造全连接层

In [9]:
def linear_regression_v3(train_x,train_y,epoch=20000,lr=0.001):
    with tf.variable_scope("v2", reuse=tf.AUTO_REUSE) as scope:
        x = tf.placeholder(tf.float32, [100,1])
        y = tf.placeholder(tf.float32, [100,1])
        pred = tf.layers.dense(x, 1, use_bias=True)
        loss = tf.reduce_sum(tf.pow(pred - y, 2),name="loss")

        optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(1,epoch+1):
                sess.run(optimizer, {x:train_x, y:train_y})
                if i % 2000 == 0:
                    ll = sess.run([loss], {x:train_x, y:train_y})
                    print("%s, %s" % (i, ll))
#             gv = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='v2')
#             for var in gv:
#                 print("%s : %s" % (var.name, sess.run(var, {x:train_x, y:train_y})))

In [5]:
train_x = np.arange(100).reshape([100,1])
train_y = train_x * 2.0 - 1.5 + 0.05 * np.random.randn()

In [13]:
linear_regression_v1(train_x,train_y)

2000, 1038.709
4000, 110.91292
6000, 85.89453
8000, 44.006004
10000, 8.602722
12000, 0.15436323
14000, 2.8198938e-06
16000, 6.949996e-12
18000, 6.949996e-12
20000, 6.949996e-12


In [16]:
linear_regression_v2(train_x,train_y)

2000, 49.28425
4000, 27.510662
6000, 15.776136
8000, 9.45639
10000, 6.044359
12000, 4.2093344
14000, 3.2350032
16000, 2.6731892
18000, 2.3982048
20000, 2.268446


In [10]:
linear_regression_v3(train_x,train_y)

2000, [1128122.8]
4000, [116878.73]
6000, [1304.0945]
8000, [594.8844]
10000, [494.91302]
12000, [317.5016]
14000, [131.33923]
16000, [26.674183]
18000, [0.9867292]
20000, [0.00019921354]
