# 神经网络

## 使用tensorflow高级API训练MLP多层感知机

In [1]:
import numpy as np
import tensorflow as tf

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [2]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)

test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

eval_results

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpu6bme28g', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff0d9866780>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
To construct input pipelines, use the `tf.

{'accuracy': 0.9772,
 'average_loss': 0.11150786,
 'loss': 14.114919,
 'global_step': 44000}

## 使用tensorflow低级API训练DNN

使用minibatch 批量梯度下降法在MNIST数据集上训练

### 建模阶段

In [3]:
import numpy as np
import tensorflow as tf
from utils import (reset_tf_graph, show_tf_graph)

reset_tf_graph()

n_inputs = 28*28 # minst里的图像分辨率是28*28
n_hidden1 = 300 # 第一个隐藏层的神经元数量
n_hidden2 = 100
n_outputs = 10 # 分类10个数字

# 定义训练数据的占位符
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X") # shape的第0维长度表示的是minibatch的数量, 目前为止, 所以取None既任意数量, 第1维为28*28个点的灰度值
y = tf.placeholder(tf.int64, shape=(None), name="y") # 训练真值为minibatch个数字

In [4]:
# X为当前层的输入
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.variable_scope(name):
        n_inputs = int(X.get_shape()[1]) # 输入张量的形状是 n_intances * n_features
        stddev = 2 / np.sqrt(n_inputs)
        w_init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) # 使用阶段正态分布初始化权重, W权重的形状: n_features * n_neurons
        W = tf.Variable(w_init, name="weights")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias") # 偏置b的形状等于神经元的数量 n_neurons
        z = tf.matmul(X, W) + b # X dot W + b 最后输出的形状的 n_instances * n_neurons
        if activation == "relu":
            return tf.nn.relu(z)
        else:
            return z

In [5]:
# 拼装3层神经网络
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, "hidden1", activation="relu")
    hidden2 = neuron_layer(hidden1, n_hidden2, "hidden2", activation="relu")
    logits = neuron_layer(hidden2, n_outputs, "outpus")

In [6]:
# 定义交叉熵为损失函数
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [7]:
learning_rate = 0.01
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [8]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [9]:
init = tf.global_variables_initializer() 
saver = tf.train.Saver()

### 执行阶段

In [10]:
# 加载mnist数据
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [11]:
n_epochs = 40 # 40次训练
batch_size = 50 # 每个minibatch 50个训练实例

# 定义一个从训练集中随机挑选50个实例的方法
def batch_generator(X, y, size):
    rnd_idx = np.random.permutation(len(X)) # len(X)是矩阵X的第0维的长度, 生成0..len(X)的随机数
    n_batches = len(X) // batch_size # //是整除, mod
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch # yield定义了生成器generator, 可以用next(generator)来调用, 也可以用for循环调用
    

In [12]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in batch_generator(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)
    
    save_path = saver.save(sess, "./chpt10/my_model_final.ckpt")

0 Batch accuracy: 0.9 Val accuracy: 0.9146
1 Batch accuracy: 0.92 Val accuracy: 0.936
2 Batch accuracy: 0.96 Val accuracy: 0.945
3 Batch accuracy: 0.92 Val accuracy: 0.9512
4 Batch accuracy: 0.98 Val accuracy: 0.956
5 Batch accuracy: 0.96 Val accuracy: 0.9566
6 Batch accuracy: 1.0 Val accuracy: 0.9612
7 Batch accuracy: 0.94 Val accuracy: 0.963
8 Batch accuracy: 0.98 Val accuracy: 0.965
9 Batch accuracy: 0.96 Val accuracy: 0.966
10 Batch accuracy: 0.92 Val accuracy: 0.9688
11 Batch accuracy: 0.98 Val accuracy: 0.9692
12 Batch accuracy: 0.98 Val accuracy: 0.9666
13 Batch accuracy: 0.98 Val accuracy: 0.9706
14 Batch accuracy: 1.0 Val accuracy: 0.9714
15 Batch accuracy: 0.94 Val accuracy: 0.9734
16 Batch accuracy: 1.0 Val accuracy: 0.973
17 Batch accuracy: 1.0 Val accuracy: 0.9742
18 Batch accuracy: 1.0 Val accuracy: 0.975
19 Batch accuracy: 0.98 Val accuracy: 0.9742
20 Batch accuracy: 1.0 Val accuracy: 0.9752
21 Batch accuracy: 1.0 Val accuracy: 0.9756
22 Batch accuracy: 0.98 Val accuracy

In [13]:
with tf.Session() as sess:
    saver.restore(sess, "./chpt10/my_model_final.ckpt") # or better, use save_path
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)
    
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

INFO:tensorflow:Restoring parameters from ./chpt10/my_model_final.ckpt
Predicted classes: [7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [14]:
file_writer = tf.summary.FileWriter("./chpt10_dnn", tf.get_default_graph())