In [1]:
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer

  from ._conv import register_converters as _register_converters


In [2]:
digits = load_digits()
X = digits.data
y = digits.target
y = LabelBinarizer().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X , y,test_size  = 0.3)

In [3]:
input_size = 64
output_size = 10
norm = False
hidden_units1 = 100
hidden_units2 = 50

In [4]:
def add_layer(inputs, in_size,out_size, n_layer,keep_prob, activation_function=None, BN = False):
    layer_name = 'layer_%s' % n_layer
    # 把所有内容可视化的时候包含在layer节点内
    with tf.name_scope(layer_name):
        
        with tf.name_scope("weights"):
            Weights = tf.Variable(tf.random_normal([in_size, out_size]), name="W") # 权重矩阵 行=输入维度，列=输出维度（下一层输入维度）
            # 把参数数据总结到直方图中
            tf.summary.histogram(layer_name+"/weights", Weights)
            
        with tf.name_scope("biases"):
            biases = tf.Variable(tf.zeros([1,out_size]) + 0.1, name="b")           # 偏置向量 行=1，列=输出维度
            tf.summary.histogram(layer_name+"/biases", biases)
            
        with tf.name_scope("Wx_plus_b"):
            Wx_plus_b = tf.matmul(inputs, Weights) + biases              # 其实是xW，如果行列定义反过来，这里可以Wx，xW的比较符合数据储存直觉
            # 加入dropout，rate由keep_prob决定
            Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
        
        #BN
        if BN:
            with tf.name_scope("batch_normalization"):
                # moments求矩估计，均值和方差。axes=[0]表示只在第一个维度（batch）上求均值方差，如果用[0,1,2]就对3个维度求（如图片的batch 长 宽）
                fc_mean, fc_var = tf.nn.moments(Wx_plus_b, axes=[0])

                # 定义Z=gamma×z+beta的gamma和beta
                with tf.name_scope("gamma"):
                    scale = tf.Variable(tf.ones([out_size]))
                    tf.summary.histogram(layer_name+"/gamma", scale)
                with tf.name_scope("beta"):
                    shift = tf.Variable(tf.zeros([out_size]))
                    tf.summary.histogram(layer_name+"/beta", shift)
                # 防止标准化的时候除0
                epsilon = 0.001
                
                # 指数移动平均
                ema = tf.train.ExponentialMovingAverage(decay=0.5)
                def mean_var_with_update():
                    ema_apply_op = ema.apply([fc_mean, fc_var])
                    with tf.control_dependencies([ema_apply_op]):
                        return tf.identity(fc_mean), tf.identity(fc_var)
                    
                mean, var = tf.cond(on_train,
                                   mean_var_with_update,
                                   lambda:(ema.average(fc_mean),
                                          ema.average(fc_var)))
                #Wx_plus_b = (Wx_plus_b - fc_mean)/tf.sqrt(fc_var + epsilon)
                #Wx_plus_b = Wx_plus_b * scale + shift
                #这2步同下一步
                Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, mean, var, shift, scale, epsilon)

            
        if activation_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activation_function(Wx_plus_b)
        tf.summary.histogram(layer_name+"/outputs", outputs)
        return outputs

def compute_accuracy(v_xs, v_ys):
    global prediction #先把prediction定义为全局变量
    y_pre = sess.run(prediction, feed_dict={xs:v_xs,keep_prob_l1:1, keep_prob_l2:1, on_train:False}) #生成预测值（概率），10分类，所以一个样本是10列概率
    correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1)) #比较概率最大值的位置和真实标签位置是否一样，一样就是true
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) #计算平均正确率
    result = sess.run(accuracy, feed_dict={xs:v_xs, ys:v_ys}) #生成正确率
    return result



In [5]:
with tf.name_scope("dropout_rate"):
    keep_prob_l1 = tf.placeholder(tf.float32, name="dropout_rate_l1")
    keep_prob_l2 = tf.placeholder(tf.float32, name="dropout_rate_l2")
    
with tf.name_scope("inputs"):
    xs = tf.placeholder(tf.float32, [None,input_size], name = "x_input") # 输入数据：None不限制样本数，64=8x8,每个样本64个像素点（特征）
    ys = tf.placeholder(tf.float32, [None,output_size], name = "y_input") # 输出数据：None不限制样本数，10个输出（10分类问题）

on_train = tf.placeholder(tf.bool, name = "on_train")
    
if norm:
    fc_mean, fc_var = tf.nn.moments(xs, axes=[0])
    scale = tf.Variable(tf.ones([input_size]))
    shift = tf.Variable(tf.zeros([input_size]))
    epsilon = 0.001
    xs = tf.nn.batch_normalization(xs, fc_mean, fc_var, shift, scale, epsilon)



In [6]:
#（隐藏层）：输入为上一层输出=64
l1 = add_layer(xs, input_size, hidden_units1, n_layer='l1',keep_prob = keep_prob_l1, activation_function=tf.nn.sigmoid, BN=True)
l2 = add_layer(l1, hidden_units1, hidden_units2, n_layer='l2',keep_prob = keep_prob_l2, activation_function=tf.nn.sigmoid, BN=True)
#（输出层）：输入为上一层输出=100，输出10
prediction = add_layer(l2, hidden_units2, output_size, n_layer='predict', keep_prob=1, activation_function=tf.nn.softmax)
    
with tf.name_scope("corss_entropy"):
    corss_entropy =  tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1]))
    tf.summary.scalar("corss_entropy",corss_entropy)
    
with tf.name_scope("train"):
    train_step = tf. train.GradientDescentOptimizer(0.5).minimize(corss_entropy)

sess = tf.Session()

# 将上述可视化合并
merged = tf.summary.merge_all()

# 将以上结构写入文件，分为train和test
train_writer = tf.summary.FileWriter("logs/DNN/train",sess.graph)
test_writer = tf.summary.FileWriter("logs/DNN/test",sess.graph)
# 终端激活对应环境后执行 tensorboard --logdi'file:///home/kenn/tensorflow_learning/logs'
# localhost:6006看



In [7]:
init = tf.global_variables_initializer()
sess.run(init)
for i in range(10000):
    #通过placeholder定义输入的话，都要用feed_dict载入数据，dropout参数也要从这里输入
    sess.run(train_step, feed_dict = {xs:X_train, ys:y_train, keep_prob_l1:0.5, keep_prob_l2:0.3, on_train:True})
    
    if i%500 == 0:
        print(compute_accuracy(X_test, y_test))
        
        #预测的时候droprate为0
        train_result = sess.run(merged, feed_dict = {xs:X_train, 
                                                     ys:y_train, 
                                                     keep_prob_l1:1, 
                                                     keep_prob_l2:1,
                                                     on_train:False})
        test_result = sess.run(merged, feed_dict = {xs:X_test, 
                                                    ys:y_test, 
                                                    keep_prob_l1:1, 
                                                    keep_prob_l2:1,
                                                    on_train:False})
        
        train_writer.add_summary(train_result,i)
        test_writer.add_summary(test_result,i)

0.062962964
0.8388889
0.8981481
0.92407405
0.93333334
0.9388889
0.94814813
0.95185184
0.95185184
0.95555556
0.95555556
0.9574074
0.9592593
0.9611111
0.9592593
0.9592593
0.962963
0.96481484
0.96481484
0.96481484
