In [1]:
import tensorflow as tf
import numpy as np

# 建立神经网络
## 1.准备数据

In [2]:
x_data=np.linspace(-1,1,300).reshape(-1,1)
noise=np.random.normal(0,0.05,x_data.shape)
y_data=np.square(x_data)-0.5+noise

## 2. 建立一个层

In [3]:
def add_layer(inputs,in_size,out_size,activation_function=None):
    weights=tf.Variable(tf.random_normal((in_size,out_size)))
    biases=tf.Variable(tf.zeros(shape=(1,out_size))+0.1) #因为bias初始最好不为0
    wx_plus_b=tf.matmul(inputs,weights)+biases
    if activation_function is None:
        outputs=wx_plus_b
    else:
        outputs=activation_function(wx_plus_b)
    return outputs

## 3. 建立所有层

In [4]:
xs=tf.placeholder(shape=(None,1),dtype=tf.float32)
ys=tf.placeholder(shape=(None,1),dtype=tf.float32)
layer_1=add_layer(xs,1,10,activation_function=tf.nn.relu)
prediction=add_layer(layer_1,10,1,activation_function=None)

## 3.损失函数

In [5]:
loss=tf.reduce_mean(tf.reduce_sum(tf.square(ys-prediction),reduction_indices=[1]))
train_step=tf.train.GradientDescentOptimizer(0.1).minimize(loss)

## 4. 训练

In [6]:
init=tf.global_variables_initializer()
sess=tf.Session()
sess.run(init)

In [9]:
for i in range(1000):
    sess.run(train_step,feed_dict={xs:x_data,ys:y_data})
    if i%300==0:
        print(sess.run(loss,feed_dict={xs:x_data,ys:y_data}))

0.00325003
0.00310567
0.00298206
0.00288067


# 损失函数
#### 1.交叉熵
$H(p,q)=-\sum p(x)\log q(x)$

In [None]:
cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
# y_代表正确结果，y代表预测结果
# tf.clip_by_value可以把张量中的数值限制在某个范围内,这里为了防止log0报错
# reduce_mean:大概是求均值？

In [None]:
# TensorFlow对softmax+crossentropy进行了封装
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(y,y_)

#### 2.MSE
$MSE(y,y')=\dfrac{\sum (y-y')^2}{n}$

In [None]:
mse=tf.reduce_mean(tf.square(y_-y))

#### 3.自定义
例如，预测销量时，多预测一个损失1元，少预测1个损失10元。  
$Loss(y,y')=\sum f(y_i,y_i')$,
$f(x,y)=\left\{\begin{array}{ccc}a(x-y)&x>y\\
b(y-x)&x\leq y\end{array}\right.$

In [None]:
loss=tf.reduce_sum(tf.select(tf.greater(v1,v2),(v1-v2)*a,(v2-v1)*b))


# 学习率
decayed_learning_rate=learning_rate * decay_rate ** (global_step/decay_steps)
```
tf.train.exponential_decay# 指数下降法减小学习率
learning_rate=tf.train.exponential_decay(0.01,global_step=10000,decay_steps=100,decay_rate=0.96,staircase=True)
```
staircase=True时，(global_step/decay_steps)会转化成整数，导致学习率阶梯下降

In [2]:
import tensorflow as tf
TRAINING_STEPS = 100
global_step = tf.Variable(0)
LEARNING_RATE = tf.train.exponential_decay(0.1, global_step, 1, 0.96, staircase=True)

x = tf.Variable(tf.constant(5, dtype=tf.float32), name="x")
y = tf.square(x)
train_op = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(y, global_step=global_step)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(TRAINING_STEPS):
        sess.run(train_op)
        if i % 10 == 0:
            LEARNING_RATE_value = sess.run(LEARNING_RATE)
            x_value = sess.run(x)
            print ("After %s iteration(s): x%s is %f, learning rate is %f."% (i+1, i+1, x_value, LEARNING_RATE_value))

After 1 iteration(s): x1 is 4.000000, learning rate is 0.096000.
After 11 iteration(s): x11 is 0.690561, learning rate is 0.063824.
After 21 iteration(s): x21 is 0.222583, learning rate is 0.042432.
After 31 iteration(s): x31 is 0.106405, learning rate is 0.028210.
After 41 iteration(s): x41 is 0.065548, learning rate is 0.018755.
After 51 iteration(s): x51 is 0.047625, learning rate is 0.012469.
After 61 iteration(s): x61 is 0.038558, learning rate is 0.008290.
After 71 iteration(s): x71 is 0.033523, learning rate is 0.005511.
After 81 iteration(s): x81 is 0.030553, learning rate is 0.003664.
After 91 iteration(s): x91 is 0.028727, learning rate is 0.002436.


# 过拟合
cost Function加上L1 正则化或L2正则化  
其中L1正则化可以将某些参数变成0，从而使参数系数。  
L2不会使参数系数  
(原因参见lasso和ridge regression)


In [None]:
loss=tf.reduce_mean(tf.square(y_-y))+tf.contrib.layers.l2_regularizer(alpha)(w)

# 滑动平均

#### 1. 定义变量及滑动平均类

In [3]:
import tensorflow as tf
v1 = tf.Variable(0, dtype=tf.float32)
step = tf.Variable(0, trainable=False)
ema = tf.train.ExponentialMovingAverage(0.99, step)
maintain_averages_op = ema.apply([v1]) 

#### 2. 查看不同迭代中变量取值的变化。
$\min (decay,\dfrac{1+num_{updates}}{10+num_{updates}})$
一般情况下，decay设定为一个非常接近1的数

In [4]:
with tf.Session() as sess:
    
    # 初始化
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print (sess.run([v1, ema.average(v1)]))
    
    # 更新变量v1的取值
    sess.run(tf.assign(v1, 5))
    sess.run(maintain_averages_op)#decay=0.1
    print (sess.run([v1, ema.average(v1)])) #0.1*0+0.9*5=4.5
    
    # 更新step和v1的取值
    sess.run(tf.assign(step, 10000))  
    sess.run(tf.assign(v1, 10))
    sess.run(maintain_averages_op)#decay=0.99
    print (sess.run([v1, ema.average(v1)]) )      
    
    # 更新一次v1的滑动平均值
    sess.run(maintain_averages_op)
    print (sess.run([v1, ema.average(v1)])) 


[0.0, 0.0]
[5.0, 4.5]
[10.0, 4.5549998]
[10.0, 4.6094499]


# 正则化

- 加入collection

In [None]:
weight_loss=tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss')#wl是L2_loss的系数
tf.add_to_collection('losses',weight_loss)#加入一个collection

- 正则化的和

In [None]:
tf.add_n(tf.get_collection('losses'),name='total_loss')

把entropy也加入'losses'使代码简洁一些

In [None]:
tf.add_to_collection('losses',cross_entropy)
tf.add_n(tf.get_collection('losses'),name='total_loss')